...
This commit is contained in:
parent
838e966dc9
commit
189971509a
@ -117,13 +117,8 @@ impl ProductComponentBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
/// Product represents a product or service offered in the system
|
/// Product represents a product or service offered in the system
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
=======
|
|
||||||
/// Product represents a product or service offered by the Freezone
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, CustomType)]
|
|
||||||
>>>>>>> builders_in_script
|
|
||||||
pub struct Product {
|
pub struct Product {
|
||||||
pub id: i64,
|
pub id: i64,
|
||||||
pub name: String,
|
pub name: String,
|
||||||
|
95
ourdb/Cargo.lock
generated
95
ourdb/Cargo.lock
generated
@ -29,6 +29,12 @@ version = "1.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "2.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bumpalo"
|
name = "bumpalo"
|
||||||
version = "3.17.0"
|
version = "3.17.0"
|
||||||
@ -181,6 +187,22 @@ version = "1.15.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "errno"
|
||||||
|
version = "0.3.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fastrand"
|
||||||
|
version = "2.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getrandom"
|
name = "getrandom"
|
||||||
version = "0.2.15"
|
version = "0.2.15"
|
||||||
@ -189,7 +211,19 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"libc",
|
"libc",
|
||||||
"wasi",
|
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "getrandom"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"r-efi",
|
||||||
|
"wasi 0.14.2+wasi-0.2.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -250,6 +284,12 @@ version = "0.2.171"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
|
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linux-raw-sys"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.27"
|
version = "0.4.27"
|
||||||
@ -291,6 +331,7 @@ dependencies = [
|
|||||||
"criterion",
|
"criterion",
|
||||||
"log",
|
"log",
|
||||||
"rand",
|
"rand",
|
||||||
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -349,6 +390,12 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "r-efi"
|
||||||
|
version = "5.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rand"
|
name = "rand"
|
||||||
version = "0.8.5"
|
version = "0.8.5"
|
||||||
@ -376,7 +423,7 @@ version = "0.6.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"getrandom",
|
"getrandom 0.2.15",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -428,6 +475,19 @@ version = "0.8.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustix"
|
||||||
|
version = "1.0.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
"linux-raw-sys",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustversion"
|
name = "rustversion"
|
||||||
version = "1.0.20"
|
version = "1.0.20"
|
||||||
@ -492,6 +552,19 @@ dependencies = [
|
|||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tempfile"
|
||||||
|
version = "3.19.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf"
|
||||||
|
dependencies = [
|
||||||
|
"fastrand",
|
||||||
|
"getrandom 0.3.2",
|
||||||
|
"once_cell",
|
||||||
|
"rustix",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.69"
|
version = "1.0.69"
|
||||||
@ -544,6 +617,15 @@ version = "0.11.0+wasi-snapshot-preview1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasi"
|
||||||
|
version = "0.14.2+wasi-0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
|
||||||
|
dependencies = [
|
||||||
|
"wit-bindgen-rt",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-bindgen"
|
name = "wasm-bindgen"
|
||||||
version = "0.2.100"
|
version = "0.2.100"
|
||||||
@ -694,6 +776,15 @@ version = "0.52.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wit-bindgen-rt"
|
||||||
|
version = "0.39.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zerocopy"
|
name = "zerocopy"
|
||||||
version = "0.8.24"
|
version = "0.8.24"
|
||||||
|
@ -13,6 +13,7 @@ rand = "0.8.5"
|
|||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion = "0.5.1"
|
criterion = "0.5.1"
|
||||||
|
tempfile = "3.8.0"
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "ourdb_benchmarks"
|
name = "ourdb_benchmarks"
|
||||||
|
@ -26,7 +26,7 @@ use std::path::PathBuf;
|
|||||||
fn main() -> Result<(), ourdb::Error> {
|
fn main() -> Result<(), ourdb::Error> {
|
||||||
// Create a new database
|
// Create a new database
|
||||||
let config = OurDBConfig {
|
let config = OurDBConfig {
|
||||||
path: PathBuf::from("/path/to/db"),
|
path: PathBuf::from("/tmp/ourdb"),
|
||||||
incremental_mode: true,
|
incremental_mode: true,
|
||||||
file_size: None, // Use default (500MB)
|
file_size: None, // Use default (500MB)
|
||||||
keysize: None, // Use default (4 bytes)
|
keysize: None, // Use default (4 bytes)
|
||||||
|
277
ourdb/benches/ourdb_benchmarks.rs
Normal file
277
ourdb/benches/ourdb_benchmarks.rs
Normal file
@ -0,0 +1,277 @@
|
|||||||
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
fn criterion_benchmark(c: &mut Criterion) {
|
||||||
|
// Create a temporary directory for benchmarks
|
||||||
|
let temp_dir = tempdir().expect("Failed to create temp directory");
|
||||||
|
let db_path = temp_dir.path().to_path_buf();
|
||||||
|
|
||||||
|
// Benchmark set operation (insertion)
|
||||||
|
c.bench_function("set", |b| {
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: db_path.clone(),
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(10 * 1024 * 1024), // 10MB
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let test_data = vec![b'X'; 100]; // 100 bytes of data
|
||||||
|
let mut i = 0;
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: None, // Let the DB assign an ID
|
||||||
|
data: &test_data,
|
||||||
|
};
|
||||||
|
black_box(db.set(args).unwrap());
|
||||||
|
i += 1;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Setup database with data for other benchmarks
|
||||||
|
let setup_config = OurDBConfig {
|
||||||
|
path: db_path.clone(),
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(10 * 1024 * 1024), // 10MB
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut setup_db = OurDB::new(setup_config).unwrap();
|
||||||
|
let test_data = vec![b'X'; 100]; // 100 bytes of data
|
||||||
|
let mut ids = Vec::with_capacity(1000);
|
||||||
|
|
||||||
|
// Insert 1000 records
|
||||||
|
for _ in 0..1000 {
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: None,
|
||||||
|
data: &test_data,
|
||||||
|
};
|
||||||
|
let id = setup_db.set(args).unwrap();
|
||||||
|
ids.push(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Benchmark get operation
|
||||||
|
c.bench_function("get", |b| {
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: db_path.clone(),
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(10 * 1024 * 1024),
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let mut i = 0;
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let id = ids[i % ids.len()];
|
||||||
|
black_box(db.get(id).unwrap());
|
||||||
|
i += 1;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Benchmark update operation
|
||||||
|
c.bench_function("update", |b| {
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: db_path.clone(),
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(10 * 1024 * 1024),
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let updated_data = vec![b'Y'; 100]; // Different data for updates
|
||||||
|
let mut i = 0;
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let id = ids[i % ids.len()];
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: Some(id),
|
||||||
|
data: &updated_data,
|
||||||
|
};
|
||||||
|
black_box(db.set(args).unwrap());
|
||||||
|
i += 1;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Benchmark get_history operation
|
||||||
|
c.bench_function("get_history", |b| {
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: db_path.clone(),
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(10 * 1024 * 1024),
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let mut i = 0;
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let id = ids[i % ids.len()];
|
||||||
|
black_box(db.get_history(id, 2).unwrap());
|
||||||
|
i += 1;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Benchmark delete operation
|
||||||
|
c.bench_function("delete", |b| {
|
||||||
|
// Create a fresh database for deletion benchmarks
|
||||||
|
let delete_dir = tempdir().expect("Failed to create temp directory");
|
||||||
|
let delete_path = delete_dir.path().to_path_buf();
|
||||||
|
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: delete_path.clone(),
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(10 * 1024 * 1024),
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let test_data = vec![b'X'; 100];
|
||||||
|
|
||||||
|
// Setup keys to delete
|
||||||
|
let mut delete_ids = Vec::with_capacity(1000);
|
||||||
|
for _ in 0..1000 {
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: None,
|
||||||
|
data: &test_data,
|
||||||
|
};
|
||||||
|
let id = db.set(args).unwrap();
|
||||||
|
delete_ids.push(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut i = 0;
|
||||||
|
b.iter(|| {
|
||||||
|
let id = delete_ids[i % delete_ids.len()];
|
||||||
|
// Only try to delete if it exists (not already deleted)
|
||||||
|
if db.get(id).is_ok() {
|
||||||
|
black_box(db.delete(id).unwrap());
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Benchmark key-value mode vs incremental mode
|
||||||
|
let mut group = c.benchmark_group("mode_comparison");
|
||||||
|
|
||||||
|
// Benchmark set in key-value mode
|
||||||
|
group.bench_function("set_keyvalue_mode", |b| {
|
||||||
|
let kv_dir = tempdir().expect("Failed to create temp directory");
|
||||||
|
let kv_path = kv_dir.path().to_path_buf();
|
||||||
|
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: kv_path.clone(),
|
||||||
|
incremental_mode: false, // Key-value mode
|
||||||
|
file_size: Some(10 * 1024 * 1024),
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let test_data = vec![b'X'; 100];
|
||||||
|
let mut i = 0;
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let id = i + 1; // Explicit ID
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: Some(id as u32),
|
||||||
|
data: &test_data,
|
||||||
|
};
|
||||||
|
black_box(db.set(args).unwrap());
|
||||||
|
i += 1;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Benchmark set in incremental mode
|
||||||
|
group.bench_function("set_incremental_mode", |b| {
|
||||||
|
let inc_dir = tempdir().expect("Failed to create temp directory");
|
||||||
|
let inc_path = inc_dir.path().to_path_buf();
|
||||||
|
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: inc_path.clone(),
|
||||||
|
incremental_mode: true, // Incremental mode
|
||||||
|
file_size: Some(10 * 1024 * 1024),
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let test_data = vec![b'X'; 100];
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: None, // Auto-generated ID
|
||||||
|
data: &test_data,
|
||||||
|
};
|
||||||
|
black_box(db.set(args).unwrap());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.finish();
|
||||||
|
|
||||||
|
// Benchmark with different record sizes
|
||||||
|
let mut size_group = c.benchmark_group("record_size");
|
||||||
|
|
||||||
|
for &size in &[10, 100, 1000, 10000] {
|
||||||
|
size_group.bench_function(format!("set_size_{}", size), |b| {
|
||||||
|
let size_dir = tempdir().expect("Failed to create temp directory");
|
||||||
|
let size_path = size_dir.path().to_path_buf();
|
||||||
|
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: size_path.clone(),
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(10 * 1024 * 1024),
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let test_data = vec![b'X'; size];
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: None,
|
||||||
|
data: &test_data,
|
||||||
|
};
|
||||||
|
black_box(db.set(args).unwrap());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
size_group.bench_function(format!("get_size_{}", size), |b| {
|
||||||
|
let size_dir = tempdir().expect("Failed to create temp directory");
|
||||||
|
let size_path = size_dir.path().to_path_buf();
|
||||||
|
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: size_path.clone(),
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(10 * 1024 * 1024),
|
||||||
|
keysize: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config).unwrap();
|
||||||
|
let test_data = vec![b'X'; size];
|
||||||
|
|
||||||
|
// Insert some records first
|
||||||
|
let mut size_ids = Vec::with_capacity(100);
|
||||||
|
for _ in 0..100 {
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: None,
|
||||||
|
data: &test_data,
|
||||||
|
};
|
||||||
|
let id = db.set(args).unwrap();
|
||||||
|
size_ids.push(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut i = 0;
|
||||||
|
b.iter(|| {
|
||||||
|
let id = size_ids[i % size_ids.len()];
|
||||||
|
black_box(db.get(id).unwrap());
|
||||||
|
i += 1;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
size_group.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, criterion_benchmark);
|
||||||
|
criterion_main!(benches);
|
@ -38,7 +38,7 @@ use radixtree::RadixTree;
|
|||||||
|
|
||||||
fn main() -> Result<(), radixtree::Error> {
|
fn main() -> Result<(), radixtree::Error> {
|
||||||
// Create a new radix tree
|
// Create a new radix tree
|
||||||
let mut tree = RadixTree::new("/path/to/storage", false)?;
|
let mut tree = RadixTree::new("/tmp/radix", false)?;
|
||||||
|
|
||||||
// Set key-value pairs
|
// Set key-value pairs
|
||||||
tree.set("hello", b"world".to_vec())?;
|
tree.set("hello", b"world".to_vec())?;
|
||||||
@ -68,10 +68,10 @@ fn main() -> Result<(), radixtree::Error> {
|
|||||||
|
|
||||||
```rust
|
```rust
|
||||||
// Create a new radix tree
|
// Create a new radix tree
|
||||||
let mut tree = RadixTree::new("/path/to/storage", false)?;
|
let mut tree = RadixTree::new("/tmp/radix", false)?;
|
||||||
|
|
||||||
// Create a new radix tree and reset if it exists
|
// Create a new radix tree and reset if it exists
|
||||||
let mut tree = RadixTree::new("/path/to/storage", true)?;
|
let mut tree = RadixTree::new("/tmp/radix", true)?;
|
||||||
```
|
```
|
||||||
|
|
||||||
### Setting Values
|
### Setting Values
|
||||||
|
121
radixtree/examples/large_scale_test.rs
Normal file
121
radixtree/examples/large_scale_test.rs
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
use radixtree::RadixTree;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
// Use much smaller batches to avoid hitting OurDB's size limit
|
||||||
|
const BATCH_SIZE: usize = 1_000;
|
||||||
|
const NUM_BATCHES: usize = 1_000; // Total records: 1,000,000
|
||||||
|
const PROGRESS_INTERVAL: usize = 100;
|
||||||
|
|
||||||
|
fn main() -> Result<(), radixtree::Error> {
|
||||||
|
// Overall metrics
|
||||||
|
let total_start_time = Instant::now();
|
||||||
|
let mut total_records_inserted = 0;
|
||||||
|
let mut batch_times = Vec::with_capacity(NUM_BATCHES);
|
||||||
|
|
||||||
|
println!("Will insert up to {} records in batches of {}",
|
||||||
|
BATCH_SIZE * NUM_BATCHES, BATCH_SIZE);
|
||||||
|
|
||||||
|
// Process in batches to avoid OurDB size limits
|
||||||
|
for batch in 0..NUM_BATCHES {
|
||||||
|
// Create a new database for each batch
|
||||||
|
let batch_path = std::env::temp_dir().join(format!("radixtree_batch_{}", batch));
|
||||||
|
|
||||||
|
// Clean up any existing database
|
||||||
|
if batch_path.exists() {
|
||||||
|
std::fs::remove_dir_all(&batch_path)?;
|
||||||
|
}
|
||||||
|
std::fs::create_dir_all(&batch_path)?;
|
||||||
|
|
||||||
|
println!("\nBatch {}/{}: Creating new radix tree...", batch + 1, NUM_BATCHES);
|
||||||
|
let mut tree = RadixTree::new(batch_path.to_str().unwrap(), true)?;
|
||||||
|
|
||||||
|
let batch_start_time = Instant::now();
|
||||||
|
let mut last_progress_time = Instant::now();
|
||||||
|
let mut last_progress_count = 0;
|
||||||
|
|
||||||
|
// Insert records for this batch
|
||||||
|
for i in 0..BATCH_SIZE {
|
||||||
|
let global_index = batch * BATCH_SIZE + i;
|
||||||
|
let key = format!("key:{:08}", global_index);
|
||||||
|
let value = format!("val{}", global_index).into_bytes();
|
||||||
|
|
||||||
|
tree.set(&key, value)?;
|
||||||
|
|
||||||
|
// Show progress at intervals
|
||||||
|
if (i + 1) % PROGRESS_INTERVAL == 0 || i == BATCH_SIZE - 1 {
|
||||||
|
let records_since_last = i + 1 - last_progress_count;
|
||||||
|
let time_since_last = last_progress_time.elapsed();
|
||||||
|
let records_per_second = records_since_last as f64 / time_since_last.as_secs_f64();
|
||||||
|
|
||||||
|
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
|
||||||
|
i + 1, BATCH_SIZE,
|
||||||
|
(i + 1) as f64 / BATCH_SIZE as f64 * 100.0,
|
||||||
|
records_per_second);
|
||||||
|
io::stdout().flush().unwrap();
|
||||||
|
|
||||||
|
last_progress_time = Instant::now();
|
||||||
|
last_progress_count = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let batch_duration = batch_start_time.elapsed();
|
||||||
|
batch_times.push(batch_duration);
|
||||||
|
total_records_inserted += BATCH_SIZE;
|
||||||
|
|
||||||
|
println!("\nBatch {}/{} completed in {:?} ({:.2} records/sec)",
|
||||||
|
batch + 1, NUM_BATCHES,
|
||||||
|
batch_duration,
|
||||||
|
BATCH_SIZE as f64 / batch_duration.as_secs_f64());
|
||||||
|
|
||||||
|
// Test random access performance for this batch
|
||||||
|
println!("Testing access performance for batch {}...", batch + 1);
|
||||||
|
let mut total_get_time = Duration::new(0, 0);
|
||||||
|
let num_samples = 100;
|
||||||
|
|
||||||
|
// Use a simple distribution pattern
|
||||||
|
for i in 0..num_samples {
|
||||||
|
// Distribute samples across the batch
|
||||||
|
let sample_id = batch * BATCH_SIZE + (i * (BATCH_SIZE / num_samples));
|
||||||
|
let key = format!("key:{:08}", sample_id);
|
||||||
|
|
||||||
|
let get_start = Instant::now();
|
||||||
|
let _ = tree.get(&key)?;
|
||||||
|
total_get_time += get_start.elapsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Average time to retrieve a record: {:?}",
|
||||||
|
total_get_time / num_samples as u32);
|
||||||
|
|
||||||
|
// Test prefix search performance
|
||||||
|
println!("Testing prefix search performance...");
|
||||||
|
let prefix = format!("key:{:02}", batch % 100);
|
||||||
|
|
||||||
|
let list_start = Instant::now();
|
||||||
|
let keys = tree.list(&prefix)?;
|
||||||
|
let list_duration = list_start.elapsed();
|
||||||
|
|
||||||
|
println!("Found {} keys with prefix '{}' in {:?}",
|
||||||
|
keys.len(), prefix, list_duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Overall performance summary
|
||||||
|
let total_duration = total_start_time.elapsed();
|
||||||
|
println!("\n\nPerformance Summary:");
|
||||||
|
println!("Total time to insert {} records: {:?}", total_records_inserted, total_duration);
|
||||||
|
println!("Average insertion rate: {:.2} records/second",
|
||||||
|
total_records_inserted as f64 / total_duration.as_secs_f64());
|
||||||
|
|
||||||
|
// Show performance trend
|
||||||
|
println!("\nPerformance Trend (batch number vs. time):");
|
||||||
|
for (i, duration) in batch_times.iter().enumerate() {
|
||||||
|
if i % 10 == 0 || i == batch_times.len() - 1 { // Only show every 10th point
|
||||||
|
println!(" Batch {}: {:?} ({:.2} records/sec)",
|
||||||
|
i + 1,
|
||||||
|
duration,
|
||||||
|
BATCH_SIZE as f64 / duration.as_secs_f64());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
134
radixtree/examples/performance_test.rs
Normal file
134
radixtree/examples/performance_test.rs
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
use radixtree::RadixTree;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
// Number of records to insert
|
||||||
|
const TOTAL_RECORDS: usize = 1_000_000;
|
||||||
|
// How often to report progress (every X records)
|
||||||
|
const PROGRESS_INTERVAL: usize = 10_000;
|
||||||
|
// How many records to use for performance sampling
|
||||||
|
const PERFORMANCE_SAMPLE_SIZE: usize = 1000;
|
||||||
|
|
||||||
|
fn main() -> Result<(), radixtree::Error> {
|
||||||
|
// Create a temporary directory for the database
|
||||||
|
let db_path = std::env::temp_dir().join("radixtree_performance_test");
|
||||||
|
|
||||||
|
// Completely remove and recreate the directory to ensure a clean start
|
||||||
|
if db_path.exists() {
|
||||||
|
std::fs::remove_dir_all(&db_path)?;
|
||||||
|
}
|
||||||
|
std::fs::create_dir_all(&db_path)?;
|
||||||
|
|
||||||
|
println!("Creating radix tree at: {}", db_path.display());
|
||||||
|
println!("Will insert {} records and show progress...", TOTAL_RECORDS);
|
||||||
|
|
||||||
|
// Create a new radix tree
|
||||||
|
let mut tree = RadixTree::new(db_path.to_str().unwrap(), true)?;
|
||||||
|
|
||||||
|
// Track overall time
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
// Track performance metrics
|
||||||
|
let mut insertion_times = Vec::with_capacity(TOTAL_RECORDS / PROGRESS_INTERVAL);
|
||||||
|
let mut last_batch_time = Instant::now();
|
||||||
|
let mut last_batch_records = 0;
|
||||||
|
|
||||||
|
// Insert records and track progress
|
||||||
|
for i in 0..TOTAL_RECORDS {
|
||||||
|
let key = format!("key:{:08}", i);
|
||||||
|
// Use smaller values to avoid exceeding OurDB's size limit
|
||||||
|
let value = format!("val{}", i).into_bytes();
|
||||||
|
|
||||||
|
// Time the insertion of every Nth record for performance sampling
|
||||||
|
if i % PERFORMANCE_SAMPLE_SIZE == 0 {
|
||||||
|
let insert_start = Instant::now();
|
||||||
|
tree.set(&key, value)?;
|
||||||
|
let insert_duration = insert_start.elapsed();
|
||||||
|
|
||||||
|
// Only print detailed timing for specific samples to avoid flooding output
|
||||||
|
if i % (PERFORMANCE_SAMPLE_SIZE * 10) == 0 {
|
||||||
|
println!("Record {}: Insertion took {:?}", i, insert_duration);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tree.set(&key, value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show progress at intervals
|
||||||
|
if (i + 1) % PROGRESS_INTERVAL == 0 || i == TOTAL_RECORDS - 1 {
|
||||||
|
let records_in_batch = i + 1 - last_batch_records;
|
||||||
|
let batch_duration = last_batch_time.elapsed();
|
||||||
|
let records_per_second = records_in_batch as f64 / batch_duration.as_secs_f64();
|
||||||
|
|
||||||
|
insertion_times.push((i + 1, batch_duration));
|
||||||
|
|
||||||
|
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
|
||||||
|
i + 1, TOTAL_RECORDS,
|
||||||
|
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
|
||||||
|
records_per_second);
|
||||||
|
io::stdout().flush().unwrap();
|
||||||
|
|
||||||
|
last_batch_time = Instant::now();
|
||||||
|
last_batch_records = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_duration = start_time.elapsed();
|
||||||
|
println!("\n\nPerformance Summary:");
|
||||||
|
println!("Total time to insert {} records: {:?}", TOTAL_RECORDS, total_duration);
|
||||||
|
println!("Average insertion rate: {:.2} records/second",
|
||||||
|
TOTAL_RECORDS as f64 / total_duration.as_secs_f64());
|
||||||
|
|
||||||
|
// Show performance trend
|
||||||
|
println!("\nPerformance Trend (records inserted vs. time per batch):");
|
||||||
|
for (i, (record_count, duration)) in insertion_times.iter().enumerate() {
|
||||||
|
if i % 10 == 0 || i == insertion_times.len() - 1 { // Only show every 10th point to avoid too much output
|
||||||
|
println!(" After {} records: {:?} for {} records ({:.2} records/sec)",
|
||||||
|
record_count,
|
||||||
|
duration,
|
||||||
|
PROGRESS_INTERVAL,
|
||||||
|
PROGRESS_INTERVAL as f64 / duration.as_secs_f64());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test access performance with distributed samples
|
||||||
|
println!("\nTesting access performance with distributed samples...");
|
||||||
|
let mut total_get_time = Duration::new(0, 0);
|
||||||
|
let num_samples = 1000;
|
||||||
|
|
||||||
|
// Use a simple distribution pattern instead of random
|
||||||
|
for i in 0..num_samples {
|
||||||
|
// Distribute samples across the entire range
|
||||||
|
let sample_id = (i * (TOTAL_RECORDS / num_samples)) % TOTAL_RECORDS;
|
||||||
|
let key = format!("key:{:08}", sample_id);
|
||||||
|
|
||||||
|
let get_start = Instant::now();
|
||||||
|
let _ = tree.get(&key)?;
|
||||||
|
total_get_time += get_start.elapsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Average time to retrieve a record: {:?}",
|
||||||
|
total_get_time / num_samples as u32);
|
||||||
|
|
||||||
|
// Test prefix search performance
|
||||||
|
println!("\nTesting prefix search performance...");
|
||||||
|
let prefixes = ["key:0", "key:1", "key:5", "key:9"];
|
||||||
|
|
||||||
|
for prefix in &prefixes {
|
||||||
|
let list_start = Instant::now();
|
||||||
|
let keys = tree.list(prefix)?;
|
||||||
|
let list_duration = list_start.elapsed();
|
||||||
|
|
||||||
|
println!("Found {} keys with prefix '{}' in {:?}",
|
||||||
|
keys.len(), prefix, list_duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up (optional)
|
||||||
|
if std::env::var("KEEP_DB").is_err() {
|
||||||
|
std::fs::remove_dir_all(&db_path)?;
|
||||||
|
println!("\nCleaned up database directory");
|
||||||
|
} else {
|
||||||
|
println!("\nDatabase kept at: {}", db_path.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -13,8 +13,8 @@ pub fn new_radix_tree(path: &str, reset: bool) -> Result<RadixTree, Error> {
|
|||||||
let config = OurDBConfig {
|
let config = OurDBConfig {
|
||||||
path: PathBuf::from(path),
|
path: PathBuf::from(path),
|
||||||
incremental_mode: true,
|
incremental_mode: true,
|
||||||
file_size: Some(1024 * 1024), // 1MB file size
|
file_size: Some(1024 * 1024 * 10), // 10MB file size for better performance with large datasets
|
||||||
keysize: Some(4), // Default key size
|
keysize: Some(6), // Use keysize=6 to support multiple files (file_nr + position)
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut db = OurDB::new(config)?;
|
let mut db = OurDB::new(config)?;
|
||||||
|
180
tst/Cargo.lock
generated
Normal file
180
tst/Cargo.lock
generated
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crc32fast"
|
||||||
|
version = "1.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "getrandom"
|
||||||
|
version = "0.2.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"wasi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.172"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.27"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ourdb"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"crc32fast",
|
||||||
|
"log",
|
||||||
|
"rand",
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ppv-lite86"
|
||||||
|
version = "0.2.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
|
||||||
|
dependencies = [
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.95"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.40"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"rand_chacha",
|
||||||
|
"rand_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_chacha"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||||
|
dependencies = [
|
||||||
|
"ppv-lite86",
|
||||||
|
"rand_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_core"
|
||||||
|
version = "0.6.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.100"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror"
|
||||||
|
version = "1.0.69"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror-impl",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror-impl"
|
||||||
|
version = "1.0.69"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tst"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"ourdb",
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasi"
|
||||||
|
version = "0.11.0+wasi-snapshot-preview1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy"
|
||||||
|
version = "0.8.24"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879"
|
||||||
|
dependencies = [
|
||||||
|
"zerocopy-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerocopy-derive"
|
||||||
|
version = "0.8.24"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
31
tst/Cargo.toml
Normal file
31
tst/Cargo.toml
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
[package]
|
||||||
|
name = "tst"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
description = "A persistent ternary search tree implementation using OurDB for storage"
|
||||||
|
authors = ["OurWorld Team"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
ourdb = { path = "../ourdb" }
|
||||||
|
thiserror = "1.0.40"
|
||||||
|
log = "0.4.17"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
# criterion = "0.5.1"
|
||||||
|
|
||||||
|
# Uncomment when benchmarks are implemented
|
||||||
|
# [[bench]]
|
||||||
|
# name = "tst_benchmarks"
|
||||||
|
# harness = false
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "basic_usage"
|
||||||
|
path = "examples/basic_usage.rs"
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "prefix_ops"
|
||||||
|
path = "examples/prefix_ops.rs"
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "performance"
|
||||||
|
path = "examples/performance.rs"
|
183
tst/README.md
Normal file
183
tst/README.md
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
# Ternary Search Tree (TST)
|
||||||
|
|
||||||
|
A persistent ternary search tree implementation in Rust using OurDB for storage.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
TST is a space-optimized tree data structure that enables efficient string key operations with persistent storage. This implementation provides a persistent ternary search tree that can be used for efficient string key operations, such as auto-complete, routing tables, and more.
|
||||||
|
|
||||||
|
A ternary search tree is a type of trie where each node has three children: left, middle, and right. Unlike a radix tree which compresses common prefixes, a TST stores one character per node and uses a binary search tree-like structure for efficient traversal.
|
||||||
|
|
||||||
|
Key characteristics:
|
||||||
|
- Each node stores a single character
|
||||||
|
- Nodes have three children: left (for characters < current), middle (for next character in key), and right (for characters > current)
|
||||||
|
- Leaf nodes contain the actual values
|
||||||
|
- Balanced structure for consistent performance across operations
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- Efficient string key operations
|
||||||
|
- Persistent storage using OurDB backend
|
||||||
|
- Balanced tree structure for consistent performance
|
||||||
|
- Support for binary values
|
||||||
|
- Thread-safe operations through OurDB
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Add the dependency to your `Cargo.toml`:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[dependencies]
|
||||||
|
tst = { path = "../tst" }
|
||||||
|
```
|
||||||
|
|
||||||
|
### Basic Example
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use tst::TST;
|
||||||
|
|
||||||
|
fn main() -> Result<(), tst::Error> {
|
||||||
|
// Create a new ternary search tree
|
||||||
|
let mut tree = TST::new("/tmp/tst", false)?;
|
||||||
|
|
||||||
|
// Set key-value pairs
|
||||||
|
tree.set("hello", b"world".to_vec())?;
|
||||||
|
tree.set("help", b"me".to_vec())?;
|
||||||
|
|
||||||
|
// Get values by key
|
||||||
|
let value = tree.get("hello")?;
|
||||||
|
println!("hello: {}", String::from_utf8_lossy(&value)); // Prints: world
|
||||||
|
|
||||||
|
// List keys by prefix
|
||||||
|
let keys = tree.list("hel")?; // Returns ["hello", "help"]
|
||||||
|
println!("Keys with prefix 'hel': {:?}", keys);
|
||||||
|
|
||||||
|
// Get all values by prefix
|
||||||
|
let values = tree.getall("hel")?; // Returns [b"world", b"me"]
|
||||||
|
|
||||||
|
// Delete keys
|
||||||
|
tree.delete("help")?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
### Creating a TST
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Create a new ternary search tree
|
||||||
|
let mut tree = TST::new("/tmp/tst", false)?;
|
||||||
|
|
||||||
|
// Create a new ternary search tree and reset if it exists
|
||||||
|
let mut tree = TST::new("/tmp/tst", true)?;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Setting Values
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Set a key-value pair
|
||||||
|
tree.set("key", b"value".to_vec())?;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Getting Values
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Get a value by key
|
||||||
|
let value = tree.get("key")?;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deleting Keys
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Delete a key
|
||||||
|
tree.delete("key")?;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Listing Keys by Prefix
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// List all keys with a given prefix
|
||||||
|
let keys = tree.list("prefix")?;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Getting All Values by Prefix
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Get all values for keys with a given prefix
|
||||||
|
let values = tree.getall("prefix")?;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Characteristics
|
||||||
|
|
||||||
|
- Search: O(k) where k is the key length
|
||||||
|
- Insert: O(k) for new keys
|
||||||
|
- Delete: O(k) plus potential node cleanup
|
||||||
|
- Space: O(n) where n is the total number of nodes
|
||||||
|
|
||||||
|
## Use Cases
|
||||||
|
|
||||||
|
TST is particularly useful for:
|
||||||
|
- Prefix-based searching
|
||||||
|
- Auto-complete systems
|
||||||
|
- Dictionary implementations
|
||||||
|
- Spell checking
|
||||||
|
- Any application requiring efficient string key operations with persistence
|
||||||
|
|
||||||
|
## Implementation Details
|
||||||
|
|
||||||
|
The TST implementation uses OurDB for persistent storage:
|
||||||
|
- Each node is serialized and stored as a record in OurDB
|
||||||
|
- Node references use OurDB record IDs
|
||||||
|
- The tree maintains a root node ID for traversal
|
||||||
|
- Node serialization includes version tracking for format evolution
|
||||||
|
|
||||||
|
## Running Tests
|
||||||
|
|
||||||
|
The project includes a comprehensive test suite that verifies all functionality:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run all tests
|
||||||
|
cargo test
|
||||||
|
|
||||||
|
# Run specific test file
|
||||||
|
cargo test --test basic_test
|
||||||
|
cargo test --test prefix_test
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running Examples
|
||||||
|
|
||||||
|
The project includes example applications that demonstrate how to use the TST:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run the basic usage example
|
||||||
|
cargo run --example basic_usage
|
||||||
|
|
||||||
|
# Run the prefix operations example
|
||||||
|
cargo run --example prefix_ops
|
||||||
|
|
||||||
|
# Run the performance test
|
||||||
|
cargo run --example performance
|
||||||
|
```
|
||||||
|
|
||||||
|
## Comparison with RadixTree
|
||||||
|
|
||||||
|
While both TST and RadixTree provide efficient string key operations, they have different characteristics:
|
||||||
|
|
||||||
|
- **TST**: Stores one character per node, with a balanced structure for consistent performance across operations.
|
||||||
|
- **RadixTree**: Compresses common prefixes, which can be more space-efficient for keys with long common prefixes.
|
||||||
|
|
||||||
|
Choose TST when:
|
||||||
|
- You need balanced performance across all operations
|
||||||
|
- Your keys don't share long common prefixes
|
||||||
|
- You want a simpler implementation with predictable performance
|
||||||
|
|
||||||
|
Choose RadixTree when:
|
||||||
|
- Space efficiency is a priority
|
||||||
|
- Your keys share long common prefixes
|
||||||
|
- You prioritize lookup performance over balanced performance
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under the same license as the HeroCode project.
|
76
tst/examples/basic_usage.rs
Normal file
76
tst/examples/basic_usage.rs
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
use tst::TST;
|
||||||
|
use std::time::Instant;
|
||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
fn main() -> Result<(), tst::Error> {
|
||||||
|
// Create a temporary directory for the database
|
||||||
|
let db_path = std::env::temp_dir().join("tst_example");
|
||||||
|
std::fs::create_dir_all(&db_path)?;
|
||||||
|
|
||||||
|
println!("Creating ternary search tree at: {}", db_path.display());
|
||||||
|
|
||||||
|
// Create a new TST
|
||||||
|
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
|
||||||
|
|
||||||
|
// Store some data
|
||||||
|
println!("Inserting data...");
|
||||||
|
tree.set("hello", b"world".to_vec())?;
|
||||||
|
tree.set("help", b"me".to_vec())?;
|
||||||
|
tree.set("helicopter", b"flying".to_vec())?;
|
||||||
|
tree.set("apple", b"fruit".to_vec())?;
|
||||||
|
tree.set("application", b"software".to_vec())?;
|
||||||
|
tree.set("banana", b"yellow".to_vec())?;
|
||||||
|
|
||||||
|
// Retrieve and print the data
|
||||||
|
let value = tree.get("hello")?;
|
||||||
|
println!("hello: {}", String::from_utf8_lossy(&value));
|
||||||
|
|
||||||
|
// List keys with prefix
|
||||||
|
println!("\nListing keys with prefix 'hel':");
|
||||||
|
let start = Instant::now();
|
||||||
|
let keys = tree.list("hel")?;
|
||||||
|
let duration = start.elapsed();
|
||||||
|
|
||||||
|
for key in &keys {
|
||||||
|
println!(" {}", key);
|
||||||
|
}
|
||||||
|
println!("Found {} keys in {:?}", keys.len(), duration);
|
||||||
|
|
||||||
|
// Get all values with prefix
|
||||||
|
println!("\nGetting all values with prefix 'app':");
|
||||||
|
let start = Instant::now();
|
||||||
|
let values = tree.getall("app")?;
|
||||||
|
let duration = start.elapsed();
|
||||||
|
|
||||||
|
for (i, value) in values.iter().enumerate() {
|
||||||
|
println!(" Value {}: {}", i + 1, String::from_utf8_lossy(value));
|
||||||
|
}
|
||||||
|
println!("Found {} values in {:?}", values.len(), duration);
|
||||||
|
|
||||||
|
// Delete a key
|
||||||
|
println!("\nDeleting 'help'...");
|
||||||
|
tree.delete("help")?;
|
||||||
|
|
||||||
|
// Verify deletion
|
||||||
|
println!("Listing keys with prefix 'hel' after deletion:");
|
||||||
|
let keys_after = tree.list("hel")?;
|
||||||
|
for key in &keys_after {
|
||||||
|
println!(" {}", key);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to get a deleted key
|
||||||
|
match tree.get("help") {
|
||||||
|
Ok(_) => println!("Unexpectedly found 'help' after deletion!"),
|
||||||
|
Err(e) => println!("As expected, 'help' was not found: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up (optional)
|
||||||
|
if std::env::var("KEEP_DB").is_err() {
|
||||||
|
std::fs::remove_dir_all(&db_path)?;
|
||||||
|
println!("\nCleaned up database directory");
|
||||||
|
} else {
|
||||||
|
println!("\nDatabase kept at: {}", db_path.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
134
tst/examples/performance.rs
Normal file
134
tst/examples/performance.rs
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
use tst::TST;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
// Number of records to insert
|
||||||
|
const TOTAL_RECORDS: usize = 100_000;
|
||||||
|
// How often to report progress (every X records)
|
||||||
|
const PROGRESS_INTERVAL: usize = 1_000;
|
||||||
|
// How many records to use for performance sampling
|
||||||
|
const PERFORMANCE_SAMPLE_SIZE: usize = 100;
|
||||||
|
|
||||||
|
fn main() -> Result<(), tst::Error> {
|
||||||
|
// Create a temporary directory for the database
|
||||||
|
let db_path = std::env::temp_dir().join("tst_performance_test");
|
||||||
|
|
||||||
|
// Completely remove and recreate the directory to ensure a clean start
|
||||||
|
if db_path.exists() {
|
||||||
|
std::fs::remove_dir_all(&db_path)?;
|
||||||
|
}
|
||||||
|
std::fs::create_dir_all(&db_path)?;
|
||||||
|
|
||||||
|
println!("Creating ternary search tree at: {}", db_path.display());
|
||||||
|
println!("Will insert {} records and show progress...", TOTAL_RECORDS);
|
||||||
|
|
||||||
|
// Create a new TST
|
||||||
|
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
|
||||||
|
|
||||||
|
// Track overall time
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
// Track performance metrics
|
||||||
|
let mut insertion_times = Vec::with_capacity(TOTAL_RECORDS / PROGRESS_INTERVAL);
|
||||||
|
let mut last_batch_time = Instant::now();
|
||||||
|
let mut last_batch_records = 0;
|
||||||
|
|
||||||
|
// Insert records and track progress
|
||||||
|
for i in 0..TOTAL_RECORDS {
|
||||||
|
let key = format!("key:{:08}", i);
|
||||||
|
// Use smaller values to avoid exceeding OurDB's size limit
|
||||||
|
let value = format!("val{}", i).into_bytes();
|
||||||
|
|
||||||
|
// Time the insertion of every Nth record for performance sampling
|
||||||
|
if i % PERFORMANCE_SAMPLE_SIZE == 0 {
|
||||||
|
let insert_start = Instant::now();
|
||||||
|
tree.set(&key, value)?;
|
||||||
|
let insert_duration = insert_start.elapsed();
|
||||||
|
|
||||||
|
// Only print detailed timing for specific samples to avoid flooding output
|
||||||
|
if i % (PERFORMANCE_SAMPLE_SIZE * 10) == 0 {
|
||||||
|
println!("Record {}: Insertion took {:?}", i, insert_duration);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tree.set(&key, value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show progress at intervals
|
||||||
|
if (i + 1) % PROGRESS_INTERVAL == 0 || i == TOTAL_RECORDS - 1 {
|
||||||
|
let records_in_batch = i + 1 - last_batch_records;
|
||||||
|
let batch_duration = last_batch_time.elapsed();
|
||||||
|
let records_per_second = records_in_batch as f64 / batch_duration.as_secs_f64();
|
||||||
|
|
||||||
|
insertion_times.push((i + 1, batch_duration));
|
||||||
|
|
||||||
|
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
|
||||||
|
i + 1, TOTAL_RECORDS,
|
||||||
|
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
|
||||||
|
records_per_second);
|
||||||
|
io::stdout().flush().unwrap();
|
||||||
|
|
||||||
|
last_batch_time = Instant::now();
|
||||||
|
last_batch_records = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_duration = start_time.elapsed();
|
||||||
|
println!("\n\nPerformance Summary:");
|
||||||
|
println!("Total time to insert {} records: {:?}", TOTAL_RECORDS, total_duration);
|
||||||
|
println!("Average insertion rate: {:.2} records/second",
|
||||||
|
TOTAL_RECORDS as f64 / total_duration.as_secs_f64());
|
||||||
|
|
||||||
|
// Show performance trend
|
||||||
|
println!("\nPerformance Trend (records inserted vs. time per batch):");
|
||||||
|
for (i, (record_count, duration)) in insertion_times.iter().enumerate() {
|
||||||
|
if i % 10 == 0 || i == insertion_times.len() - 1 { // Only show every 10th point to avoid too much output
|
||||||
|
println!(" After {} records: {:?} for {} records ({:.2} records/sec)",
|
||||||
|
record_count,
|
||||||
|
duration,
|
||||||
|
PROGRESS_INTERVAL,
|
||||||
|
PROGRESS_INTERVAL as f64 / duration.as_secs_f64());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test access performance with distributed samples
|
||||||
|
println!("\nTesting access performance with distributed samples...");
|
||||||
|
let mut total_get_time = Duration::new(0, 0);
|
||||||
|
let num_samples = 1000;
|
||||||
|
|
||||||
|
// Use a simple distribution pattern instead of random
|
||||||
|
for i in 0..num_samples {
|
||||||
|
// Distribute samples across the entire range
|
||||||
|
let sample_id = (i * (TOTAL_RECORDS / num_samples)) % TOTAL_RECORDS;
|
||||||
|
let key = format!("key:{:08}", sample_id);
|
||||||
|
|
||||||
|
let get_start = Instant::now();
|
||||||
|
let _ = tree.get(&key)?;
|
||||||
|
total_get_time += get_start.elapsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Average time to retrieve a record: {:?}",
|
||||||
|
total_get_time / num_samples as u32);
|
||||||
|
|
||||||
|
// Test prefix search performance
|
||||||
|
println!("\nTesting prefix search performance...");
|
||||||
|
let prefixes = ["key:0", "key:1", "key:5", "key:9"];
|
||||||
|
|
||||||
|
for prefix in &prefixes {
|
||||||
|
let list_start = Instant::now();
|
||||||
|
let keys = tree.list(prefix)?;
|
||||||
|
let list_duration = list_start.elapsed();
|
||||||
|
|
||||||
|
println!("Found {} keys with prefix '{}' in {:?}",
|
||||||
|
keys.len(), prefix, list_duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up (optional)
|
||||||
|
if std::env::var("KEEP_DB").is_err() {
|
||||||
|
std::fs::remove_dir_all(&db_path)?;
|
||||||
|
println!("\nCleaned up database directory");
|
||||||
|
} else {
|
||||||
|
println!("\nDatabase kept at: {}", db_path.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
123
tst/examples/prefix_ops.rs
Normal file
123
tst/examples/prefix_ops.rs
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
use tst::TST;
|
||||||
|
use std::time::Instant;
|
||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
fn main() -> Result<(), tst::Error> {
|
||||||
|
// Create a temporary directory for the database
|
||||||
|
let db_path = std::env::temp_dir().join("tst_prefix_example");
|
||||||
|
std::fs::create_dir_all(&db_path)?;
|
||||||
|
|
||||||
|
println!("Creating ternary search tree at: {}", db_path.display());
|
||||||
|
|
||||||
|
// Create a new TST
|
||||||
|
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
|
||||||
|
|
||||||
|
// Insert a variety of keys with different prefixes
|
||||||
|
println!("Inserting data with various prefixes...");
|
||||||
|
|
||||||
|
// Names
|
||||||
|
let names = [
|
||||||
|
"Alice", "Alexander", "Amanda", "Andrew", "Amy",
|
||||||
|
"Bob", "Barbara", "Benjamin", "Brenda", "Brian",
|
||||||
|
"Charlie", "Catherine", "Christopher", "Cynthia", "Carl",
|
||||||
|
"David", "Diana", "Daniel", "Deborah", "Donald",
|
||||||
|
"Edward", "Elizabeth", "Eric", "Emily", "Ethan"
|
||||||
|
];
|
||||||
|
|
||||||
|
for (i, name) in names.iter().enumerate() {
|
||||||
|
let value = format!("person-{}", i).into_bytes();
|
||||||
|
tree.set(name, value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cities
|
||||||
|
let cities = [
|
||||||
|
"New York", "Los Angeles", "Chicago", "Houston", "Phoenix",
|
||||||
|
"Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose",
|
||||||
|
"Austin", "Jacksonville", "Fort Worth", "Columbus", "San Francisco",
|
||||||
|
"Charlotte", "Indianapolis", "Seattle", "Denver", "Washington"
|
||||||
|
];
|
||||||
|
|
||||||
|
for (i, city) in cities.iter().enumerate() {
|
||||||
|
let value = format!("city-{}", i).into_bytes();
|
||||||
|
tree.set(city, value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Countries
|
||||||
|
let countries = [
|
||||||
|
"United States", "Canada", "Mexico", "Brazil", "Argentina",
|
||||||
|
"United Kingdom", "France", "Germany", "Italy", "Spain",
|
||||||
|
"China", "Japan", "India", "Australia", "Russia"
|
||||||
|
];
|
||||||
|
|
||||||
|
for (i, country) in countries.iter().enumerate() {
|
||||||
|
let value = format!("country-{}", i).into_bytes();
|
||||||
|
tree.set(country, value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Total items inserted: {}", names.len() + cities.len() + countries.len());
|
||||||
|
|
||||||
|
// Test prefix operations
|
||||||
|
test_prefix(&mut tree, "A")?;
|
||||||
|
test_prefix(&mut tree, "B")?;
|
||||||
|
test_prefix(&mut tree, "C")?;
|
||||||
|
test_prefix(&mut tree, "San")?;
|
||||||
|
test_prefix(&mut tree, "United")?;
|
||||||
|
|
||||||
|
// Test non-existent prefix
|
||||||
|
test_prefix(&mut tree, "Z")?;
|
||||||
|
|
||||||
|
// Test empty prefix (should return all keys)
|
||||||
|
println!("\nTesting empty prefix (should return all keys):");
|
||||||
|
let start = Instant::now();
|
||||||
|
let all_keys = tree.list("")?;
|
||||||
|
let duration = start.elapsed();
|
||||||
|
|
||||||
|
println!("Found {} keys with empty prefix in {:?}", all_keys.len(), duration);
|
||||||
|
println!("First 5 keys (alphabetically):");
|
||||||
|
for key in all_keys.iter().take(5) {
|
||||||
|
println!(" {}", key);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up (optional)
|
||||||
|
if std::env::var("KEEP_DB").is_err() {
|
||||||
|
std::fs::remove_dir_all(&db_path)?;
|
||||||
|
println!("\nCleaned up database directory");
|
||||||
|
} else {
|
||||||
|
println!("\nDatabase kept at: {}", db_path.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_prefix(tree: &mut TST, prefix: &str) -> Result<(), tst::Error> {
|
||||||
|
println!("\nTesting prefix '{}':", prefix);
|
||||||
|
|
||||||
|
// Test list operation
|
||||||
|
let start = Instant::now();
|
||||||
|
let keys = tree.list(prefix)?;
|
||||||
|
let list_duration = start.elapsed();
|
||||||
|
|
||||||
|
println!("Found {} keys with prefix '{}' in {:?}", keys.len(), prefix, list_duration);
|
||||||
|
|
||||||
|
if !keys.is_empty() {
|
||||||
|
println!("Keys:");
|
||||||
|
for key in &keys {
|
||||||
|
println!(" {}", key);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test getall operation
|
||||||
|
let start = Instant::now();
|
||||||
|
let values = tree.getall(prefix)?;
|
||||||
|
let getall_duration = start.elapsed();
|
||||||
|
|
||||||
|
println!("Retrieved {} values in {:?}", values.len(), getall_duration);
|
||||||
|
println!("First value: {}",
|
||||||
|
if !values.is_empty() {
|
||||||
|
String::from_utf8_lossy(&values[0])
|
||||||
|
} else {
|
||||||
|
"None".into()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
36
tst/src/error.rs
Normal file
36
tst/src/error.rs
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
//! Error types for the TST module.
|
||||||
|
|
||||||
|
use thiserror::Error;
|
||||||
|
use std::io;
|
||||||
|
|
||||||
|
/// Error type for TST operations.
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum Error {
|
||||||
|
/// Error from OurDB operations.
|
||||||
|
#[error("OurDB error: {0}")]
|
||||||
|
OurDB(#[from] ourdb::Error),
|
||||||
|
|
||||||
|
/// Error when a key is not found.
|
||||||
|
#[error("Key not found: {0}")]
|
||||||
|
KeyNotFound(String),
|
||||||
|
|
||||||
|
/// Error when a prefix is not found.
|
||||||
|
#[error("Prefix not found: {0}")]
|
||||||
|
PrefixNotFound(String),
|
||||||
|
|
||||||
|
/// Error during serialization.
|
||||||
|
#[error("Serialization error: {0}")]
|
||||||
|
Serialization(String),
|
||||||
|
|
||||||
|
/// Error during deserialization.
|
||||||
|
#[error("Deserialization error: {0}")]
|
||||||
|
Deserialization(String),
|
||||||
|
|
||||||
|
/// Error for invalid operations.
|
||||||
|
#[error("Invalid operation: {0}")]
|
||||||
|
InvalidOperation(String),
|
||||||
|
|
||||||
|
/// IO error.
|
||||||
|
#[error("IO error: {0}")]
|
||||||
|
IO(#[from] io::Error),
|
||||||
|
}
|
122
tst/src/lib.rs
Normal file
122
tst/src/lib.rs
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
//! TST is a space-optimized tree data structure that enables efficient string key operations
|
||||||
|
//! with persistent storage using OurDB as a backend.
|
||||||
|
//!
|
||||||
|
//! This implementation provides a persistent ternary search tree that can be used for efficient
|
||||||
|
//! string key operations, such as auto-complete, routing tables, and more.
|
||||||
|
|
||||||
|
mod error;
|
||||||
|
mod node;
|
||||||
|
mod operations;
|
||||||
|
mod serialize;
|
||||||
|
|
||||||
|
pub use error::Error;
|
||||||
|
pub use node::TSTNode;
|
||||||
|
|
||||||
|
use ourdb::OurDB;
|
||||||
|
|
||||||
|
/// TST represents a ternary search tree data structure with persistent storage.
|
||||||
|
pub struct TST {
|
||||||
|
/// Database for persistent storage
|
||||||
|
db: OurDB,
|
||||||
|
|
||||||
|
/// Database ID of the root node
|
||||||
|
root_id: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TST {
|
||||||
|
/// Creates a new TST with the specified database path.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `path` - The path to the database directory
|
||||||
|
/// * `reset` - Whether to reset the database if it exists
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A new `TST` instance
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the database cannot be created or opened
|
||||||
|
pub fn new(path: &str, reset: bool) -> Result<Self, Error> {
|
||||||
|
operations::new_tst(path, reset)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets a key-value pair in the tree.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `key` - The key to set
|
||||||
|
/// * `value` - The value to set
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the operation fails
|
||||||
|
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error> {
|
||||||
|
operations::set(self, key, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets a value by key from the tree.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `key` - The key to get
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// The value associated with the key
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the key is not found or the operation fails
|
||||||
|
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error> {
|
||||||
|
operations::get(self, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deletes a key from the tree.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `key` - The key to delete
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the key is not found or the operation fails
|
||||||
|
pub fn delete(&mut self, key: &str) -> Result<(), Error> {
|
||||||
|
operations::delete(self, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lists all keys with a given prefix.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `prefix` - The prefix to search for
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A list of keys that start with the given prefix
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the operation fails
|
||||||
|
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error> {
|
||||||
|
operations::list(self, prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets all values for keys with a given prefix.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `prefix` - The prefix to search for
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A list of values for keys that start with the given prefix
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns an error if the operation fails
|
||||||
|
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
|
||||||
|
operations::getall(self, prefix)
|
||||||
|
}
|
||||||
|
}
|
49
tst/src/node.rs
Normal file
49
tst/src/node.rs
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
//! Node types for the TST module.
|
||||||
|
|
||||||
|
/// Represents a node in the ternary search tree.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct TSTNode {
|
||||||
|
/// The character stored at this node.
|
||||||
|
pub character: char,
|
||||||
|
|
||||||
|
/// Value stored at this node (empty if not end of key).
|
||||||
|
pub value: Vec<u8>,
|
||||||
|
|
||||||
|
/// Whether this node represents the end of a key.
|
||||||
|
pub is_end_of_key: bool,
|
||||||
|
|
||||||
|
/// Reference to the left child node (for characters < current character).
|
||||||
|
pub left_id: Option<u32>,
|
||||||
|
|
||||||
|
/// Reference to the middle child node (for next character in key).
|
||||||
|
pub middle_id: Option<u32>,
|
||||||
|
|
||||||
|
/// Reference to the right child node (for characters > current character).
|
||||||
|
pub right_id: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TSTNode {
|
||||||
|
/// Creates a new node.
|
||||||
|
pub fn new(character: char, value: Vec<u8>, is_end_of_key: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
character,
|
||||||
|
value,
|
||||||
|
is_end_of_key,
|
||||||
|
left_id: None,
|
||||||
|
middle_id: None,
|
||||||
|
right_id: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new root node.
|
||||||
|
pub fn new_root() -> Self {
|
||||||
|
Self {
|
||||||
|
character: '\0', // Use null character for root
|
||||||
|
value: Vec::new(),
|
||||||
|
is_end_of_key: false,
|
||||||
|
left_id: None,
|
||||||
|
middle_id: None,
|
||||||
|
right_id: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
418
tst/src/operations.rs
Normal file
418
tst/src/operations.rs
Normal file
@ -0,0 +1,418 @@
|
|||||||
|
//! Implementation of TST operations.
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
use crate::node::TSTNode;
|
||||||
|
use crate::TST;
|
||||||
|
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
/// Creates a new TST with the specified database path.
|
||||||
|
pub fn new_tst(path: &str, reset: bool) -> Result<TST, Error> {
|
||||||
|
// If the path exists and reset is true, remove it first
|
||||||
|
let path_buf = PathBuf::from(path);
|
||||||
|
if path_buf.exists() && reset {
|
||||||
|
std::fs::remove_dir_all(&path_buf)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the directory if it doesn't exist
|
||||||
|
std::fs::create_dir_all(&path_buf)?;
|
||||||
|
|
||||||
|
let config = OurDBConfig {
|
||||||
|
path: path_buf,
|
||||||
|
incremental_mode: true,
|
||||||
|
file_size: Some(1024 * 1024), // 10MB file size for better performance with large datasets
|
||||||
|
keysize: Some(4), // Use keysize=4 (default)
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut db = OurDB::new(config)?;
|
||||||
|
|
||||||
|
let root_id = if db.get_next_id()? == 1 || reset {
|
||||||
|
// Create a new root node
|
||||||
|
let root = TSTNode::new_root();
|
||||||
|
let root_id = db.set(OurDBSetArgs {
|
||||||
|
id: None,
|
||||||
|
data: &root.serialize(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Some(root_id)
|
||||||
|
} else {
|
||||||
|
// Use existing root node
|
||||||
|
Some(1) // Root node always has ID 1
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(TST {
|
||||||
|
db,
|
||||||
|
root_id,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets a key-value pair in the tree.
|
||||||
|
pub fn set(tree: &mut TST, key: &str, value: Vec<u8>) -> Result<(), Error> {
|
||||||
|
if key.is_empty() {
|
||||||
|
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let root_id = match tree.root_id {
|
||||||
|
Some(id) => id,
|
||||||
|
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let chars: Vec<char> = key.chars().collect();
|
||||||
|
set_recursive(tree, root_id, &chars, 0, value)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursive helper function for setting a key-value pair.
|
||||||
|
fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value: Vec<u8>) -> Result<u32, Error> {
|
||||||
|
let mut node = tree.get_node(node_id)?;
|
||||||
|
|
||||||
|
if pos >= chars.len() {
|
||||||
|
// We've reached the end of the key
|
||||||
|
node.is_end_of_key = true;
|
||||||
|
node.value = value;
|
||||||
|
return tree.save_node(Some(node_id), &node);
|
||||||
|
}
|
||||||
|
|
||||||
|
let current_char = chars[pos];
|
||||||
|
|
||||||
|
if node.character == '\0' {
|
||||||
|
// Root node or empty node, set the character
|
||||||
|
node.character = current_char;
|
||||||
|
let node_id = tree.save_node(Some(node_id), &node)?;
|
||||||
|
|
||||||
|
// Continue with the next character
|
||||||
|
if pos + 1 < chars.len() {
|
||||||
|
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
|
||||||
|
let new_id = tree.save_node(None, &new_node)?;
|
||||||
|
|
||||||
|
let mut updated_node = tree.get_node(node_id)?;
|
||||||
|
updated_node.middle_id = Some(new_id);
|
||||||
|
tree.save_node(Some(node_id), &updated_node)?;
|
||||||
|
|
||||||
|
return set_recursive(tree, new_id, chars, pos + 1, value);
|
||||||
|
} else {
|
||||||
|
// This is the last character
|
||||||
|
let mut updated_node = tree.get_node(node_id)?;
|
||||||
|
updated_node.is_end_of_key = true;
|
||||||
|
updated_node.value = value;
|
||||||
|
return tree.save_node(Some(node_id), &updated_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if current_char < node.character {
|
||||||
|
// Go left
|
||||||
|
if let Some(left_id) = node.left_id {
|
||||||
|
return set_recursive(tree, left_id, chars, pos, value);
|
||||||
|
} else {
|
||||||
|
// Create new left node
|
||||||
|
let new_node = TSTNode::new(current_char, Vec::new(), false);
|
||||||
|
let new_id = tree.save_node(None, &new_node)?;
|
||||||
|
|
||||||
|
// Update current node
|
||||||
|
node.left_id = Some(new_id);
|
||||||
|
tree.save_node(Some(node_id), &node)?;
|
||||||
|
|
||||||
|
return set_recursive(tree, new_id, chars, pos, value);
|
||||||
|
}
|
||||||
|
} else if current_char > node.character {
|
||||||
|
// Go right
|
||||||
|
if let Some(right_id) = node.right_id {
|
||||||
|
return set_recursive(tree, right_id, chars, pos, value);
|
||||||
|
} else {
|
||||||
|
// Create new right node
|
||||||
|
let new_node = TSTNode::new(current_char, Vec::new(), false);
|
||||||
|
let new_id = tree.save_node(None, &new_node)?;
|
||||||
|
|
||||||
|
// Update current node
|
||||||
|
node.right_id = Some(new_id);
|
||||||
|
tree.save_node(Some(node_id), &node)?;
|
||||||
|
|
||||||
|
return set_recursive(tree, new_id, chars, pos, value);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Character matches, go middle (next character)
|
||||||
|
if pos + 1 >= chars.len() {
|
||||||
|
// This is the last character
|
||||||
|
node.is_end_of_key = true;
|
||||||
|
node.value = value;
|
||||||
|
return tree.save_node(Some(node_id), &node);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(middle_id) = node.middle_id {
|
||||||
|
return set_recursive(tree, middle_id, chars, pos + 1, value);
|
||||||
|
} else {
|
||||||
|
// Create new middle node
|
||||||
|
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
|
||||||
|
let new_id = tree.save_node(None, &new_node)?;
|
||||||
|
|
||||||
|
// Update current node
|
||||||
|
node.middle_id = Some(new_id);
|
||||||
|
tree.save_node(Some(node_id), &node)?;
|
||||||
|
|
||||||
|
return set_recursive(tree, new_id, chars, pos + 1, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets a value by key from the tree.
|
||||||
|
pub fn get(tree: &mut TST, key: &str) -> Result<Vec<u8>, Error> {
|
||||||
|
if key.is_empty() {
|
||||||
|
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let root_id = match tree.root_id {
|
||||||
|
Some(id) => id,
|
||||||
|
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let chars: Vec<char> = key.chars().collect();
|
||||||
|
let node_id = find_node(tree, root_id, &chars, 0)?;
|
||||||
|
|
||||||
|
let node = tree.get_node(node_id)?;
|
||||||
|
if node.is_end_of_key {
|
||||||
|
Ok(node.value.clone())
|
||||||
|
} else {
|
||||||
|
Err(Error::KeyNotFound(key.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finds a node by key.
|
||||||
|
fn find_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
|
||||||
|
let node = tree.get_node(node_id)?;
|
||||||
|
|
||||||
|
if pos >= chars.len() {
|
||||||
|
return Ok(node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
let current_char = chars[pos];
|
||||||
|
|
||||||
|
if current_char < node.character {
|
||||||
|
// Go left
|
||||||
|
if let Some(left_id) = node.left_id {
|
||||||
|
find_node(tree, left_id, chars, pos)
|
||||||
|
} else {
|
||||||
|
Err(Error::KeyNotFound(chars.iter().collect()))
|
||||||
|
}
|
||||||
|
} else if current_char > node.character {
|
||||||
|
// Go right
|
||||||
|
if let Some(right_id) = node.right_id {
|
||||||
|
find_node(tree, right_id, chars, pos)
|
||||||
|
} else {
|
||||||
|
Err(Error::KeyNotFound(chars.iter().collect()))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Character matches
|
||||||
|
if pos + 1 >= chars.len() {
|
||||||
|
// This is the last character
|
||||||
|
Ok(node_id)
|
||||||
|
} else if let Some(middle_id) = node.middle_id {
|
||||||
|
// Go to next character
|
||||||
|
find_node(tree, middle_id, chars, pos + 1)
|
||||||
|
} else {
|
||||||
|
Err(Error::KeyNotFound(chars.iter().collect()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deletes a key from the tree.
|
||||||
|
pub fn delete(tree: &mut TST, key: &str) -> Result<(), Error> {
|
||||||
|
if key.is_empty() {
|
||||||
|
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let root_id = match tree.root_id {
|
||||||
|
Some(id) => id,
|
||||||
|
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let chars: Vec<char> = key.chars().collect();
|
||||||
|
let node_id = find_node(tree, root_id, &chars, 0)?;
|
||||||
|
|
||||||
|
let mut node = tree.get_node(node_id)?;
|
||||||
|
|
||||||
|
if !node.is_end_of_key {
|
||||||
|
return Err(Error::KeyNotFound(key.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the node has a middle child, just mark it as not end of key
|
||||||
|
if node.middle_id.is_some() || node.left_id.is_some() || node.right_id.is_some() {
|
||||||
|
node.is_end_of_key = false;
|
||||||
|
node.value = Vec::new();
|
||||||
|
tree.save_node(Some(node_id), &node)?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, we need to remove the node and update its parent
|
||||||
|
// This is more complex and would require tracking the path to the node
|
||||||
|
// For simplicity, we'll just mark it as not end of key for now
|
||||||
|
node.is_end_of_key = false;
|
||||||
|
node.value = Vec::new();
|
||||||
|
tree.save_node(Some(node_id), &node)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lists all keys with a given prefix.
|
||||||
|
pub fn list(tree: &mut TST, prefix: &str) -> Result<Vec<String>, Error> {
|
||||||
|
let root_id = match tree.root_id {
|
||||||
|
Some(id) => id,
|
||||||
|
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
|
||||||
|
// Handle empty prefix case - will return all keys
|
||||||
|
if prefix.is_empty() {
|
||||||
|
collect_all_keys(tree, root_id, String::new(), &mut result)?;
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the node corresponding to the prefix
|
||||||
|
let chars: Vec<char> = prefix.chars().collect();
|
||||||
|
let node_id = match find_prefix_node(tree, root_id, &chars, 0) {
|
||||||
|
Ok(id) => id,
|
||||||
|
Err(_) => return Ok(Vec::new()), // Prefix not found, return empty list
|
||||||
|
};
|
||||||
|
|
||||||
|
// Collect all keys from the subtree
|
||||||
|
collect_keys_with_prefix(tree, node_id, prefix.to_string(), &mut result)?;
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finds the node corresponding to a prefix.
|
||||||
|
fn find_prefix_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
|
||||||
|
if pos >= chars.len() {
|
||||||
|
return Ok(node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
let node = tree.get_node(node_id)?;
|
||||||
|
let current_char = chars[pos];
|
||||||
|
|
||||||
|
if current_char < node.character {
|
||||||
|
// Go left
|
||||||
|
if let Some(left_id) = node.left_id {
|
||||||
|
find_prefix_node(tree, left_id, chars, pos)
|
||||||
|
} else {
|
||||||
|
Err(Error::PrefixNotFound(chars.iter().collect()))
|
||||||
|
}
|
||||||
|
} else if current_char > node.character {
|
||||||
|
// Go right
|
||||||
|
if let Some(right_id) = node.right_id {
|
||||||
|
find_prefix_node(tree, right_id, chars, pos)
|
||||||
|
} else {
|
||||||
|
Err(Error::PrefixNotFound(chars.iter().collect()))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Character matches
|
||||||
|
if pos + 1 >= chars.len() {
|
||||||
|
// This is the last character of the prefix
|
||||||
|
Ok(node_id)
|
||||||
|
} else if let Some(middle_id) = node.middle_id {
|
||||||
|
// Go to next character
|
||||||
|
find_prefix_node(tree, middle_id, chars, pos + 1)
|
||||||
|
} else {
|
||||||
|
Err(Error::PrefixNotFound(chars.iter().collect()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collects all keys with a given prefix.
|
||||||
|
fn collect_keys_with_prefix(
|
||||||
|
tree: &mut TST,
|
||||||
|
node_id: u32,
|
||||||
|
current_path: String,
|
||||||
|
result: &mut Vec<String>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let node = tree.get_node(node_id)?;
|
||||||
|
|
||||||
|
// If this node is an end of key, add it to the result
|
||||||
|
if node.is_end_of_key {
|
||||||
|
result.push(current_path.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively collect keys from all children
|
||||||
|
if let Some(left_id) = node.left_id {
|
||||||
|
collect_all_keys(tree, left_id, current_path.clone(), result)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(middle_id) = node.middle_id {
|
||||||
|
let mut new_path = current_path.clone();
|
||||||
|
new_path.push(node.character);
|
||||||
|
collect_all_keys(tree, middle_id, new_path, result)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(right_id) = node.right_id {
|
||||||
|
collect_all_keys(tree, right_id, current_path.clone(), result)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively collects all keys under a node.
|
||||||
|
fn collect_all_keys(
|
||||||
|
tree: &mut TST,
|
||||||
|
node_id: u32,
|
||||||
|
current_path: String,
|
||||||
|
result: &mut Vec<String>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let node = tree.get_node(node_id)?;
|
||||||
|
|
||||||
|
let mut new_path = current_path.clone();
|
||||||
|
new_path.push(node.character);
|
||||||
|
|
||||||
|
// If this node is an end of key, add it to the result
|
||||||
|
if node.is_end_of_key {
|
||||||
|
result.push(new_path.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively collect keys from all children
|
||||||
|
if let Some(left_id) = node.left_id {
|
||||||
|
collect_all_keys(tree, left_id, current_path.clone(), result)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(middle_id) = node.middle_id {
|
||||||
|
collect_all_keys(tree, middle_id, new_path.clone(), result)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(right_id) = node.right_id {
|
||||||
|
collect_all_keys(tree, right_id, current_path.clone(), result)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets all values for keys with a given prefix.
|
||||||
|
pub fn getall(tree: &mut TST, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
|
||||||
|
// Get all matching keys
|
||||||
|
let keys = list(tree, prefix)?;
|
||||||
|
|
||||||
|
// Get values for each key
|
||||||
|
let mut values = Vec::new();
|
||||||
|
for key in keys {
|
||||||
|
if let Ok(value) = get(tree, &key) {
|
||||||
|
values.push(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TST {
|
||||||
|
/// Helper function to get a node from the database.
|
||||||
|
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<TSTNode, Error> {
|
||||||
|
let data = self.db.get(node_id)?;
|
||||||
|
TSTNode::deserialize(&data)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper function to save a node to the database.
|
||||||
|
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &TSTNode) -> Result<u32, Error> {
|
||||||
|
let data = node.serialize();
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: node_id,
|
||||||
|
data: &data,
|
||||||
|
};
|
||||||
|
Ok(self.db.set(args)?)
|
||||||
|
}
|
||||||
|
}
|
134
tst/src/serialize.rs
Normal file
134
tst/src/serialize.rs
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
//! Serialization and deserialization for TST nodes.
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
use crate::node::TSTNode;
|
||||||
|
|
||||||
|
/// Current binary format version.
|
||||||
|
const VERSION: u8 = 1;
|
||||||
|
|
||||||
|
impl TSTNode {
|
||||||
|
/// Serializes a node to bytes for storage.
|
||||||
|
pub fn serialize(&self) -> Vec<u8> {
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
|
||||||
|
// Version
|
||||||
|
buffer.push(VERSION);
|
||||||
|
|
||||||
|
// Character (as UTF-32)
|
||||||
|
let char_bytes = (self.character as u32).to_le_bytes();
|
||||||
|
buffer.extend_from_slice(&char_bytes);
|
||||||
|
|
||||||
|
// Is end of key
|
||||||
|
buffer.push(if self.is_end_of_key { 1 } else { 0 });
|
||||||
|
|
||||||
|
// Value (only if is_end_of_key)
|
||||||
|
if self.is_end_of_key {
|
||||||
|
let value_len = (self.value.len() as u32).to_le_bytes();
|
||||||
|
buffer.extend_from_slice(&value_len);
|
||||||
|
buffer.extend_from_slice(&self.value);
|
||||||
|
} else {
|
||||||
|
// Zero length
|
||||||
|
buffer.extend_from_slice(&[0, 0, 0, 0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Child pointers
|
||||||
|
let left_id = self.left_id.unwrap_or(0).to_le_bytes();
|
||||||
|
buffer.extend_from_slice(&left_id);
|
||||||
|
|
||||||
|
let middle_id = self.middle_id.unwrap_or(0).to_le_bytes();
|
||||||
|
buffer.extend_from_slice(&middle_id);
|
||||||
|
|
||||||
|
let right_id = self.right_id.unwrap_or(0).to_le_bytes();
|
||||||
|
buffer.extend_from_slice(&right_id);
|
||||||
|
|
||||||
|
buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deserializes bytes to a node.
|
||||||
|
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
|
||||||
|
if data.len() < 14 { // Minimum size: version + char + is_end + value_len + 3 child IDs
|
||||||
|
return Err(Error::Deserialization("Data too short".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut pos = 0;
|
||||||
|
|
||||||
|
// Version
|
||||||
|
let version = data[pos];
|
||||||
|
pos += 1;
|
||||||
|
|
||||||
|
if version != VERSION {
|
||||||
|
return Err(Error::Deserialization(format!("Unsupported version: {}", version)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Character
|
||||||
|
let char_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||||
|
let char_code = u32::from_le_bytes(char_bytes);
|
||||||
|
let character = char::from_u32(char_code)
|
||||||
|
.ok_or_else(|| Error::Deserialization("Invalid character".to_string()))?;
|
||||||
|
pos += 4;
|
||||||
|
|
||||||
|
// Is end of key
|
||||||
|
let is_end_of_key = data[pos] != 0;
|
||||||
|
pos += 1;
|
||||||
|
|
||||||
|
// Value length
|
||||||
|
let value_len_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||||
|
let value_len = u32::from_le_bytes(value_len_bytes) as usize;
|
||||||
|
pos += 4;
|
||||||
|
|
||||||
|
// Value
|
||||||
|
let value = if value_len > 0 {
|
||||||
|
if pos + value_len > data.len() {
|
||||||
|
return Err(Error::Deserialization("Value length exceeds data".to_string()));
|
||||||
|
}
|
||||||
|
data[pos..pos+value_len].to_vec()
|
||||||
|
} else {
|
||||||
|
Vec::new()
|
||||||
|
};
|
||||||
|
pos += value_len;
|
||||||
|
|
||||||
|
// Child pointers
|
||||||
|
if pos + 12 > data.len() {
|
||||||
|
return Err(Error::Deserialization("Data too short for child pointers".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let left_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||||
|
let left_id = u32::from_le_bytes(left_id_bytes);
|
||||||
|
pos += 4;
|
||||||
|
|
||||||
|
let middle_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||||
|
let middle_id = u32::from_le_bytes(middle_id_bytes);
|
||||||
|
pos += 4;
|
||||||
|
|
||||||
|
let right_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||||
|
let right_id = u32::from_le_bytes(right_id_bytes);
|
||||||
|
|
||||||
|
Ok(TSTNode {
|
||||||
|
character,
|
||||||
|
value,
|
||||||
|
is_end_of_key,
|
||||||
|
left_id: if left_id == 0 { None } else { Some(left_id) },
|
||||||
|
middle_id: if middle_id == 0 { None } else { Some(middle_id) },
|
||||||
|
right_id: if right_id == 0 { None } else { Some(right_id) },
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gets the common prefix of two strings.
|
||||||
|
pub fn get_common_prefix(a: &str, b: &str) -> String {
|
||||||
|
let mut result = String::new();
|
||||||
|
let a_chars: Vec<char> = a.chars().collect();
|
||||||
|
let b_chars: Vec<char> = b.chars().collect();
|
||||||
|
|
||||||
|
let min_len = a_chars.len().min(b_chars.len());
|
||||||
|
|
||||||
|
for i in 0..min_len {
|
||||||
|
if a_chars[i] == b_chars[i] {
|
||||||
|
result.push(a_chars[i]);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
215
tst/tests/basic_test.rs
Normal file
215
tst/tests/basic_test.rs
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
use tst::TST;
|
||||||
|
use std::env::temp_dir;
|
||||||
|
use std::fs;
|
||||||
|
use std::time::SystemTime;
|
||||||
|
|
||||||
|
fn get_test_db_path() -> String {
|
||||||
|
let timestamp = SystemTime::now()
|
||||||
|
.duration_since(SystemTime::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
let path = temp_dir().join(format!("tst_test_{}", timestamp));
|
||||||
|
fs::create_dir_all(&path).unwrap();
|
||||||
|
|
||||||
|
path.to_string_lossy().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cleanup_test_db(path: &str) {
|
||||||
|
let _ = fs::remove_dir_all(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_create_tst() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let result = TST::new(&path, true);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_set_and_get() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Test setting and getting a key
|
||||||
|
let key = "test_key";
|
||||||
|
let value = b"test_value".to_vec();
|
||||||
|
|
||||||
|
let set_result = tree.set(key, value.clone());
|
||||||
|
assert!(set_result.is_ok());
|
||||||
|
|
||||||
|
let get_result = tree.get(key);
|
||||||
|
assert!(get_result.is_ok());
|
||||||
|
assert_eq!(get_result.unwrap(), value);
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_nonexistent_key() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Test getting a key that doesn't exist
|
||||||
|
let get_result = tree.get("nonexistent_key");
|
||||||
|
assert!(get_result.is_err());
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_delete() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Set a key
|
||||||
|
let key = "delete_test";
|
||||||
|
let value = b"to_be_deleted".to_vec();
|
||||||
|
|
||||||
|
tree.set(key, value).unwrap();
|
||||||
|
|
||||||
|
// Verify it exists
|
||||||
|
let get_result = tree.get(key);
|
||||||
|
assert!(get_result.is_ok());
|
||||||
|
|
||||||
|
// Delete it
|
||||||
|
let delete_result = tree.delete(key);
|
||||||
|
assert!(delete_result.is_ok());
|
||||||
|
|
||||||
|
// Verify it's gone
|
||||||
|
let get_after_delete = tree.get(key);
|
||||||
|
assert!(get_after_delete.is_err());
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_multiple_keys() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert multiple keys
|
||||||
|
let keys = ["apple", "banana", "cherry", "date", "elderberry"];
|
||||||
|
|
||||||
|
for (i, key) in keys.iter().enumerate() {
|
||||||
|
let value = format!("value_{}", i).into_bytes();
|
||||||
|
tree.set(key, value).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify all keys exist
|
||||||
|
for (i, key) in keys.iter().enumerate() {
|
||||||
|
let expected_value = format!("value_{}", i).into_bytes();
|
||||||
|
let get_result = tree.get(key).unwrap();
|
||||||
|
assert_eq!(get_result, expected_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_list_prefix() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert keys with common prefixes
|
||||||
|
let keys = [
|
||||||
|
"apple", "application", "append",
|
||||||
|
"banana", "bandana",
|
||||||
|
"cherry", "chocolate"
|
||||||
|
];
|
||||||
|
|
||||||
|
for key in &keys {
|
||||||
|
tree.set(key, key.as_bytes().to_vec()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test prefix "app"
|
||||||
|
let app_keys = tree.list("app").unwrap();
|
||||||
|
assert_eq!(app_keys.len(), 3);
|
||||||
|
assert!(app_keys.contains(&"apple".to_string()));
|
||||||
|
assert!(app_keys.contains(&"application".to_string()));
|
||||||
|
assert!(app_keys.contains(&"append".to_string()));
|
||||||
|
|
||||||
|
// Test prefix "ban"
|
||||||
|
let ban_keys = tree.list("ban").unwrap();
|
||||||
|
assert_eq!(ban_keys.len(), 2);
|
||||||
|
assert!(ban_keys.contains(&"banana".to_string()));
|
||||||
|
assert!(ban_keys.contains(&"bandana".to_string()));
|
||||||
|
|
||||||
|
// Test prefix "c"
|
||||||
|
let c_keys = tree.list("c").unwrap();
|
||||||
|
assert_eq!(c_keys.len(), 2);
|
||||||
|
assert!(c_keys.contains(&"cherry".to_string()));
|
||||||
|
assert!(c_keys.contains(&"chocolate".to_string()));
|
||||||
|
|
||||||
|
// Test non-existent prefix
|
||||||
|
let z_keys = tree.list("z").unwrap();
|
||||||
|
assert_eq!(z_keys.len(), 0);
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_getall_prefix() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert keys with common prefixes
|
||||||
|
let keys = [
|
||||||
|
"apple", "application", "append",
|
||||||
|
"banana", "bandana",
|
||||||
|
"cherry", "chocolate"
|
||||||
|
];
|
||||||
|
|
||||||
|
for key in &keys {
|
||||||
|
tree.set(key, key.as_bytes().to_vec()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test getall with prefix "app"
|
||||||
|
let app_values = tree.getall("app").unwrap();
|
||||||
|
assert_eq!(app_values.len(), 3);
|
||||||
|
|
||||||
|
// Convert values to strings for easier comparison
|
||||||
|
let app_value_strings: Vec<String> = app_values
|
||||||
|
.iter()
|
||||||
|
.map(|v| String::from_utf8_lossy(v).to_string())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
assert!(app_value_strings.contains(&"apple".to_string()));
|
||||||
|
assert!(app_value_strings.contains(&"application".to_string()));
|
||||||
|
assert!(app_value_strings.contains(&"append".to_string()));
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_prefix() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert some keys
|
||||||
|
let keys = ["apple", "banana", "cherry"];
|
||||||
|
|
||||||
|
for key in &keys {
|
||||||
|
tree.set(key, key.as_bytes().to_vec()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test list with empty prefix (should return all keys)
|
||||||
|
let all_keys = tree.list("").unwrap();
|
||||||
|
assert_eq!(all_keys.len(), keys.len());
|
||||||
|
|
||||||
|
for key in &keys {
|
||||||
|
assert!(all_keys.contains(&key.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
215
tst/tests/prefix_test.rs
Normal file
215
tst/tests/prefix_test.rs
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
use tst::TST;
|
||||||
|
use std::env::temp_dir;
|
||||||
|
use std::fs;
|
||||||
|
use std::time::SystemTime;
|
||||||
|
|
||||||
|
fn get_test_db_path() -> String {
|
||||||
|
let timestamp = SystemTime::now()
|
||||||
|
.duration_since(SystemTime::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
let path = temp_dir().join(format!("tst_prefix_test_{}", timestamp));
|
||||||
|
fs::create_dir_all(&path).unwrap();
|
||||||
|
|
||||||
|
path.to_string_lossy().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cleanup_test_db(path: &str) {
|
||||||
|
let _ = fs::remove_dir_all(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_prefix_with_common_prefixes() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert keys with common prefixes
|
||||||
|
let test_data = [
|
||||||
|
("test", b"value1".to_vec()),
|
||||||
|
("testing", b"value2".to_vec()),
|
||||||
|
("tested", b"value3".to_vec()),
|
||||||
|
("tests", b"value4".to_vec()),
|
||||||
|
("tester", b"value5".to_vec()),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (key, value) in &test_data {
|
||||||
|
tree.set(key, value.clone()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test prefix "test"
|
||||||
|
let keys = tree.list("test").unwrap();
|
||||||
|
assert_eq!(keys.len(), 5);
|
||||||
|
|
||||||
|
for (key, _) in &test_data {
|
||||||
|
assert!(keys.contains(&key.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test prefix "teste"
|
||||||
|
let keys = tree.list("teste").unwrap();
|
||||||
|
assert_eq!(keys.len(), 2);
|
||||||
|
assert!(keys.contains(&"tested".to_string()));
|
||||||
|
assert!(keys.contains(&"tester".to_string()));
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_prefix_with_different_prefixes() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert keys with different prefixes
|
||||||
|
let test_data = [
|
||||||
|
("apple", b"fruit1".to_vec()),
|
||||||
|
("banana", b"fruit2".to_vec()),
|
||||||
|
("cherry", b"fruit3".to_vec()),
|
||||||
|
("date", b"fruit4".to_vec()),
|
||||||
|
("elderberry", b"fruit5".to_vec()),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (key, value) in &test_data {
|
||||||
|
tree.set(key, value.clone()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test each prefix
|
||||||
|
for (key, _) in &test_data {
|
||||||
|
let prefix = &key[0..1]; // First character
|
||||||
|
let keys = tree.list(prefix).unwrap();
|
||||||
|
assert!(keys.contains(&key.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test non-existent prefix
|
||||||
|
let keys = tree.list("z").unwrap();
|
||||||
|
assert_eq!(keys.len(), 0);
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_prefix_with_empty_string() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert some keys
|
||||||
|
let test_data = [
|
||||||
|
("apple", b"fruit1".to_vec()),
|
||||||
|
("banana", b"fruit2".to_vec()),
|
||||||
|
("cherry", b"fruit3".to_vec()),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (key, value) in &test_data {
|
||||||
|
tree.set(key, value.clone()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test empty prefix (should return all keys)
|
||||||
|
let keys = tree.list("").unwrap();
|
||||||
|
assert_eq!(keys.len(), test_data.len());
|
||||||
|
|
||||||
|
for (key, _) in &test_data {
|
||||||
|
assert!(keys.contains(&key.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_getall_with_prefix() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert keys with common prefixes
|
||||||
|
let test_data = [
|
||||||
|
("test", b"value1".to_vec()),
|
||||||
|
("testing", b"value2".to_vec()),
|
||||||
|
("tested", b"value3".to_vec()),
|
||||||
|
("tests", b"value4".to_vec()),
|
||||||
|
("tester", b"value5".to_vec()),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (key, value) in &test_data {
|
||||||
|
tree.set(key, value.clone()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test getall with prefix "test"
|
||||||
|
let values = tree.getall("test").unwrap();
|
||||||
|
assert_eq!(values.len(), 5);
|
||||||
|
|
||||||
|
for (_, value) in &test_data {
|
||||||
|
assert!(values.contains(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_prefix_with_unicode_characters() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert keys with Unicode characters
|
||||||
|
let test_data = [
|
||||||
|
("café", b"coffee".to_vec()),
|
||||||
|
("cafétéria", b"cafeteria".to_vec()),
|
||||||
|
("caffè", b"italian coffee".to_vec()),
|
||||||
|
("café au lait", b"coffee with milk".to_vec()),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (key, value) in &test_data {
|
||||||
|
tree.set(key, value.clone()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test prefix "café"
|
||||||
|
let keys = tree.list("café").unwrap();
|
||||||
|
assert_eq!(keys.len(), 2);
|
||||||
|
assert!(keys.contains(&"café".to_string()));
|
||||||
|
assert!(keys.contains(&"café au lait".to_string()));
|
||||||
|
|
||||||
|
// Test prefix "caf"
|
||||||
|
let keys = tree.list("caf").unwrap();
|
||||||
|
assert_eq!(keys.len(), 4);
|
||||||
|
|
||||||
|
for (key, _) in &test_data {
|
||||||
|
assert!(keys.contains(&key.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_prefix_with_long_keys() {
|
||||||
|
let path = get_test_db_path();
|
||||||
|
|
||||||
|
let mut tree = TST::new(&path, true).unwrap();
|
||||||
|
|
||||||
|
// Insert long keys
|
||||||
|
let test_data = [
|
||||||
|
("this_is_a_very_long_key_for_testing_purposes_1", b"value1".to_vec()),
|
||||||
|
("this_is_a_very_long_key_for_testing_purposes_2", b"value2".to_vec()),
|
||||||
|
("this_is_a_very_long_key_for_testing_purposes_3", b"value3".to_vec()),
|
||||||
|
("this_is_another_long_key_for_testing", b"value4".to_vec()),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (key, value) in &test_data {
|
||||||
|
tree.set(key, value.clone()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test prefix "this_is_a_very"
|
||||||
|
let keys = tree.list("this_is_a_very").unwrap();
|
||||||
|
assert_eq!(keys.len(), 3);
|
||||||
|
|
||||||
|
// Test prefix "this_is"
|
||||||
|
let keys = tree.list("this_is").unwrap();
|
||||||
|
assert_eq!(keys.len(), 4);
|
||||||
|
|
||||||
|
for (key, _) in &test_data {
|
||||||
|
assert!(keys.contains(&key.to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_test_db(&path);
|
||||||
|
}
|
365
tst_implementation_plan.md
Normal file
365
tst_implementation_plan.md
Normal file
@ -0,0 +1,365 @@
|
|||||||
|
# Ternary Search Tree (TST) Implementation Plan
|
||||||
|
|
||||||
|
## 1. Overview
|
||||||
|
|
||||||
|
A Ternary Search Tree (TST) is a type of trie where each node has three children: left, middle, and right. Unlike a RadixTree which compresses common prefixes, a TST stores one character per node and uses a binary search tree-like structure for efficient traversal.
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[Root Node 'r'] --> B[Left Child 'a']
|
||||||
|
A --> C[Middle Child 'o']
|
||||||
|
A --> D[Right Child 't']
|
||||||
|
C --> E[Middle Child 'o']
|
||||||
|
E --> F[Middle Child 'm' - End of Key]
|
||||||
|
E --> G[Middle Child 't' - End of Key]
|
||||||
|
```
|
||||||
|
|
||||||
|
The TST implementation will use OurDB as the backend for persistent storage, similar to the existing RadixTree implementation. The goal is to provide a more balanced tree structure that offers consistent performance across all operations (set, get, delete, list).
|
||||||
|
|
||||||
|
## 2. Core Data Structures
|
||||||
|
|
||||||
|
### 2.1 TST Node Structure
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub struct TSTNode {
|
||||||
|
// The character stored at this node
|
||||||
|
pub character: char,
|
||||||
|
|
||||||
|
// Value stored at this node (empty if not end of key)
|
||||||
|
pub value: Vec<u8>,
|
||||||
|
|
||||||
|
// Whether this node represents the end of a key
|
||||||
|
pub is_end_of_key: bool,
|
||||||
|
|
||||||
|
// References to child nodes
|
||||||
|
pub left_id: Option<u32>, // For characters < current character
|
||||||
|
pub middle_id: Option<u32>, // For characters == current character (next character in key)
|
||||||
|
pub right_id: Option<u32>, // For characters > current character
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 TST Structure
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub struct TST {
|
||||||
|
// Database for persistent storage
|
||||||
|
db: OurDB,
|
||||||
|
|
||||||
|
// Database ID of the root node
|
||||||
|
root_id: Option<u32>,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3. API Design
|
||||||
|
|
||||||
|
The TST will maintain similar core functionality to RadixTree but with an API that better suits its structure:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
impl TST {
|
||||||
|
// Creates a new TST with the specified database path
|
||||||
|
pub fn new(path: &str, reset: bool) -> Result<Self, Error>;
|
||||||
|
|
||||||
|
// Sets a key-value pair in the tree
|
||||||
|
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error>;
|
||||||
|
|
||||||
|
// Gets a value by key from the tree
|
||||||
|
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error>;
|
||||||
|
|
||||||
|
// Deletes a key from the tree
|
||||||
|
pub fn delete(&mut self, key: &str) -> Result<(), Error>;
|
||||||
|
|
||||||
|
// Lists all keys with a given prefix
|
||||||
|
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error>;
|
||||||
|
|
||||||
|
// Gets all values for keys with a given prefix
|
||||||
|
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error>;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 4. Implementation Strategy
|
||||||
|
|
||||||
|
### 4.1 Phase 1: Core Data Structures and Serialization
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[Define TSTNode and TST structs] --> B[Implement serialization/deserialization]
|
||||||
|
B --> C[Implement Error handling]
|
||||||
|
C --> D[Implement OurDB integration]
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Define the `TSTNode` and `TST` structs
|
||||||
|
2. Implement serialization and deserialization for `TSTNode`
|
||||||
|
3. Define error types for TST-specific errors
|
||||||
|
4. Implement OurDB integration for node storage and retrieval
|
||||||
|
|
||||||
|
### 4.2 Phase 2: Basic Tree Operations
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[Implement new] --> B[Implement set]
|
||||||
|
B --> C[Implement get]
|
||||||
|
C --> D[Implement helper functions]
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Implement the `new()` function for creating a new TST
|
||||||
|
2. Implement the `set()` function for inserting key-value pairs
|
||||||
|
3. Implement the `get()` function for retrieving values
|
||||||
|
4. Implement helper functions for node traversal and manipulation
|
||||||
|
|
||||||
|
### 4.3 Phase 3: Advanced Tree Operations
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[Implement delete] --> B[Implement list]
|
||||||
|
B --> C[Implement getall]
|
||||||
|
C --> D[Optimize operations]
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Implement the `delete()` function for removing keys
|
||||||
|
2. Implement the `list()` function for prefix-based key listing
|
||||||
|
3. Implement the `getall()` function for retrieving all values with a prefix
|
||||||
|
4. Optimize operations for balanced performance
|
||||||
|
|
||||||
|
### 4.4 Phase 4: Testing and Performance Evaluation
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[Create unit tests] --> B[Create integration tests]
|
||||||
|
B --> C[Create performance tests]
|
||||||
|
C --> D[Compare with RadixTree]
|
||||||
|
D --> E[Optimize based on results]
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Create unit tests for each component
|
||||||
|
2. Create integration tests for the complete system
|
||||||
|
3. Create performance tests similar to RadixTree's
|
||||||
|
4. Compare performance with RadixTree
|
||||||
|
5. Optimize based on performance results
|
||||||
|
|
||||||
|
## 5. Implementation Details
|
||||||
|
|
||||||
|
### 5.1 Node Structure and Storage
|
||||||
|
|
||||||
|
Each TST node will store a single character and have three child pointers (left, middle, right). The nodes will be serialized and stored in OurDB, with node references using OurDB record IDs.
|
||||||
|
|
||||||
|
### 5.2 Key Operations
|
||||||
|
|
||||||
|
#### 5.2.1 Insertion (set)
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TD
|
||||||
|
A[Start at root] --> B{Root exists?}
|
||||||
|
B -- No --> C[Create root node]
|
||||||
|
B -- Yes --> D[Compare current char with node char]
|
||||||
|
D -- Less than --> E[Go to left child]
|
||||||
|
D -- Equal to --> F[Go to middle child]
|
||||||
|
D -- Greater than --> G[Go to right child]
|
||||||
|
E --> H{Child exists?}
|
||||||
|
F --> H
|
||||||
|
G --> H
|
||||||
|
H -- No --> I[Create new node]
|
||||||
|
H -- Yes --> J[Continue with next char]
|
||||||
|
I --> J
|
||||||
|
J --> K{End of key?}
|
||||||
|
K -- Yes --> L[Set value and mark as end of key]
|
||||||
|
K -- No --> D
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Start at the root node
|
||||||
|
2. For each character in the key:
|
||||||
|
- If the character is less than the current node's character, go to the left child
|
||||||
|
- If the character is equal to the current node's character, go to the middle child
|
||||||
|
- If the character is greater than the current node's character, go to the right child
|
||||||
|
- If the child doesn't exist, create a new node
|
||||||
|
3. When the end of the key is reached, set the value and mark the node as end of key
|
||||||
|
|
||||||
|
#### 5.2.2 Lookup (get)
|
||||||
|
|
||||||
|
1. Start at the root node
|
||||||
|
2. For each character in the key:
|
||||||
|
- If the character is less than the current node's character, go to the left child
|
||||||
|
- If the character is equal to the current node's character, go to the middle child
|
||||||
|
- If the character is greater than the current node's character, go to the right child
|
||||||
|
- If the child doesn't exist, the key is not found
|
||||||
|
3. When the end of the key is reached, check if the node is marked as end of key
|
||||||
|
- If yes, return the value
|
||||||
|
- If no, the key is not found
|
||||||
|
|
||||||
|
#### 5.2.3 Deletion (delete)
|
||||||
|
|
||||||
|
1. Find the node corresponding to the end of the key
|
||||||
|
2. If the node has no children, remove it and update its parent
|
||||||
|
3. If the node has children, mark it as not end of key and clear its value
|
||||||
|
4. Recursively clean up any nodes that are no longer needed
|
||||||
|
|
||||||
|
#### 5.2.4 Prefix Operations (list, getall)
|
||||||
|
|
||||||
|
1. Find the node corresponding to the end of the prefix
|
||||||
|
2. Perform a traversal of the subtree rooted at that node
|
||||||
|
3. Collect all keys (for list) or values (for getall) from nodes marked as end of key
|
||||||
|
|
||||||
|
### 5.3 Serialization and OurDB Integration
|
||||||
|
|
||||||
|
#### 5.3.1 Node Structure for Serialization
|
||||||
|
|
||||||
|
Each TSTNode will be serialized with the following logical structure:
|
||||||
|
|
||||||
|
1. Version marker (for future format evolution)
|
||||||
|
2. Character data
|
||||||
|
3. Is-end-of-key flag
|
||||||
|
4. Value (if is-end-of-key is true)
|
||||||
|
5. Child node references (left, middle, right)
|
||||||
|
|
||||||
|
#### 5.3.2 OurDB Integration
|
||||||
|
|
||||||
|
The TST will use OurDB for node storage and retrieval:
|
||||||
|
|
||||||
|
1. **Node Storage**: Each node will be serialized and stored as a record in OurDB.
|
||||||
|
```rust
|
||||||
|
fn save_node(&mut self, node_id: Option<u32>, node: &TSTNode) -> Result<u32, Error> {
|
||||||
|
let data = node.serialize();
|
||||||
|
let args = OurDBSetArgs {
|
||||||
|
id: node_id,
|
||||||
|
data: &data,
|
||||||
|
};
|
||||||
|
Ok(self.db.set(args)?)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Node Retrieval**: Nodes will be retrieved from OurDB and deserialized.
|
||||||
|
```rust
|
||||||
|
fn get_node(&mut self, node_id: u32) -> Result<TSTNode, Error> {
|
||||||
|
let data = self.db.get(node_id)?;
|
||||||
|
TSTNode::deserialize(&data)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Root Node Management**: The TST will maintain a root node ID for traversal.
|
||||||
|
|
||||||
|
#### 5.3.3 Handling Large Datasets
|
||||||
|
|
||||||
|
For large datasets, we'll implement a batching approach similar to the RadixTree's large-scale tests:
|
||||||
|
|
||||||
|
1. **Batch Processing**: Process large datasets in manageable batches to avoid OurDB size limitations.
|
||||||
|
2. **Database Partitioning**: Create separate database instances for very large datasets.
|
||||||
|
3. **Memory Management**: Implement efficient memory usage patterns to avoid excessive memory consumption.
|
||||||
|
|
||||||
|
## 6. Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
tst/
|
||||||
|
├── Cargo.toml
|
||||||
|
├── src/
|
||||||
|
│ ├── lib.rs # Public API and re-exports
|
||||||
|
│ ├── node.rs # TSTNode implementation
|
||||||
|
│ ├── serialize.rs # Serialization and deserialization
|
||||||
|
│ ├── error.rs # Error types
|
||||||
|
│ └── operations.rs # Tree operations implementation
|
||||||
|
├── tests/
|
||||||
|
│ ├── basic_test.rs # Basic operations tests
|
||||||
|
│ ├── prefix_test.rs # Prefix operations tests
|
||||||
|
│ └── edge_cases.rs # Edge case tests
|
||||||
|
└── examples/
|
||||||
|
├── basic_usage.rs # Basic usage example
|
||||||
|
├── prefix_ops.rs # Prefix operations example
|
||||||
|
└── performance.rs # Performance benchmark
|
||||||
|
```
|
||||||
|
|
||||||
|
## 7. Performance Considerations
|
||||||
|
|
||||||
|
### 7.1 Advantages of TST over RadixTree
|
||||||
|
|
||||||
|
1. **Balanced Structure**: TST naturally maintains a more balanced structure, which can lead to more consistent performance across operations.
|
||||||
|
2. **Character-by-Character Comparison**: TST performs character-by-character comparisons, which can be more efficient for certain workloads.
|
||||||
|
3. **Efficient Prefix Operations**: TST can efficiently handle prefix operations by traversing the middle child path.
|
||||||
|
|
||||||
|
### 7.2 Potential Optimizations
|
||||||
|
|
||||||
|
1. **Node Caching**: Cache frequently accessed nodes to reduce database operations.
|
||||||
|
2. **Balancing Techniques**: Implement balancing techniques to ensure the tree remains balanced.
|
||||||
|
3. **Batch Operations**: Support batch operations for improved performance.
|
||||||
|
4. **Memory Management**: Implement efficient memory usage patterns to avoid excessive memory consumption.
|
||||||
|
|
||||||
|
## 8. Testing Strategy
|
||||||
|
|
||||||
|
### 8.1 Unit Tests
|
||||||
|
|
||||||
|
1. Test `TSTNode` serialization/deserialization
|
||||||
|
2. Test character comparison operations
|
||||||
|
3. Test error handling
|
||||||
|
|
||||||
|
### 8.2 Integration Tests
|
||||||
|
|
||||||
|
1. Test basic CRUD operations
|
||||||
|
2. Test prefix operations
|
||||||
|
3. Test edge cases (empty keys, very long keys, etc.)
|
||||||
|
4. Test with large datasets
|
||||||
|
|
||||||
|
### 8.3 Performance Tests
|
||||||
|
|
||||||
|
1. Measure throughput for set/get operations
|
||||||
|
2. Measure latency for different operations
|
||||||
|
3. Test with different tree sizes and key distributions
|
||||||
|
4. Compare performance with RadixTree
|
||||||
|
|
||||||
|
#### 8.3.1 Performance Benchmarking
|
||||||
|
|
||||||
|
We'll create comprehensive benchmarks to compare the TST implementation with RadixTree:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
// Example benchmark structure
|
||||||
|
fn benchmark_set_operations(tree_type: &str, num_records: usize) -> Duration {
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
// Create tree (TST or RadixTree)
|
||||||
|
let mut tree = match tree_type {
|
||||||
|
"tst" => create_tst(),
|
||||||
|
"radix" => create_radix_tree(),
|
||||||
|
_ => panic!("Unknown tree type"),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Insert records
|
||||||
|
for i in 0..num_records {
|
||||||
|
let key = format!("key:{:08}", i);
|
||||||
|
let value = format!("val{}", i).into_bytes();
|
||||||
|
tree.set(&key, value).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
start_time.elapsed()
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
We'll benchmark the following operations:
|
||||||
|
- Set (insertion)
|
||||||
|
- Get (lookup)
|
||||||
|
- Delete
|
||||||
|
- List (prefix search)
|
||||||
|
- GetAll (prefix values)
|
||||||
|
|
||||||
|
For each operation, we'll measure:
|
||||||
|
- Throughput (operations per second)
|
||||||
|
- Latency (time per operation)
|
||||||
|
- Memory usage
|
||||||
|
- Database size
|
||||||
|
|
||||||
|
We'll test with various dataset characteristics:
|
||||||
|
- Small datasets (100-1,000 keys)
|
||||||
|
- Medium datasets (10,000-100,000 keys)
|
||||||
|
- Large datasets (1,000,000+ keys)
|
||||||
|
- Keys with common prefixes
|
||||||
|
- Keys with random distribution
|
||||||
|
- Long keys vs. short keys
|
||||||
|
|
||||||
|
## 9. Timeline and Milestones
|
||||||
|
|
||||||
|
1. **Week 1**: Core data structures and serialization
|
||||||
|
2. **Week 2**: Basic tree operations
|
||||||
|
3. **Week 3**: Advanced tree operations
|
||||||
|
4. **Week 4**: Testing and performance evaluation
|
||||||
|
5. **Week 5**: Optimization and documentation
|
||||||
|
|
||||||
|
## 10. Conclusion
|
||||||
|
|
||||||
|
This implementation plan provides a roadmap for creating a Ternary Search Tree (TST) as an alternative to the RadixTree implementation. The TST will maintain the same core functionality while providing a more balanced tree structure and aiming for balanced performance across all operations.
|
||||||
|
|
||||||
|
The implementation will leverage OurDB for persistent storage, similar to RadixTree, but with a different node structure and traversal algorithm that better suits the TST approach.
|
Loading…
Reference in New Issue
Block a user