Merge branch 'development_rhai'

This commit is contained in:
timurgordon
2025-06-25 20:46:15 +03:00
137 changed files with 7274 additions and 6786 deletions

View File

@@ -1,16 +1,16 @@
use tst::TST;
use std::time::Instant;
use tst::TST;
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Store some data
println!("Inserting data...");
tree.set("hello", b"world".to_vec())?;
@@ -19,50 +19,50 @@ fn main() -> Result<(), tst::Error> {
tree.set("apple", b"fruit".to_vec())?;
tree.set("application", b"software".to_vec())?;
tree.set("banana", b"yellow".to_vec())?;
// Retrieve and print the data
let value = tree.get("hello")?;
println!("hello: {}", String::from_utf8_lossy(&value));
// List keys with prefix
println!("\nListing keys with prefix 'hel':");
let start = Instant::now();
let keys = tree.list("hel")?;
let duration = start.elapsed();
for key in &keys {
println!(" {}", key);
}
println!("Found {} keys in {:?}", keys.len(), duration);
// Get all values with prefix
println!("\nGetting all values with prefix 'app':");
let start = Instant::now();
let values = tree.getall("app")?;
let duration = start.elapsed();
for (i, value) in values.iter().enumerate() {
println!(" Value {}: {}", i + 1, String::from_utf8_lossy(value));
}
println!("Found {} values in {:?}", values.len(), duration);
// Delete a key
println!("\nDeleting 'help'...");
tree.delete("help")?;
// Verify deletion
println!("Listing keys with prefix 'hel' after deletion:");
let keys_after = tree.list("hel")?;
for key in &keys_after {
println!(" {}", key);
}
// Try to get a deleted key
match tree.get("help") {
Ok(_) => println!("Unexpectedly found 'help' after deletion!"),
Err(e) => println!("As expected, 'help' was not found: {}", e),
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
@@ -70,6 +70,6 @@ fn main() -> Result<(), tst::Error> {
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}
}

View File

@@ -1,20 +1,20 @@
use tst::TST;
use std::time::{Duration, Instant};
use std::io::{self, Write};
use std::time::{Duration, Instant};
use tst::TST;
// Function to generate a test value of specified size
fn generate_test_value(index: usize, size: usize) -> Vec<u8> {
let base_value = format!("val{:08}", index);
let mut value = Vec::with_capacity(size);
// Fill with repeating pattern to reach desired size
while value.len() < size {
value.extend_from_slice(base_value.as_bytes());
}
// Truncate to exact size
value.truncate(size);
value
}
@@ -28,39 +28,39 @@ const PERFORMANCE_SAMPLE_SIZE: usize = 100;
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_performance_test");
// Completely remove and recreate the directory to ensure a clean start
if db_path.exists() {
std::fs::remove_dir_all(&db_path)?;
}
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
println!("Will insert {} records and show progress...", TOTAL_RECORDS);
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Track overall time
let start_time = Instant::now();
// Track performance metrics
let mut insertion_times = Vec::with_capacity(TOTAL_RECORDS / PROGRESS_INTERVAL);
let mut last_batch_time = Instant::now();
let mut last_batch_records = 0;
// Insert records and track progress
for i in 0..TOTAL_RECORDS {
let key = format!("key:{:08}", i);
// Generate a 100-byte value
let value = generate_test_value(i, 100);
// Time the insertion of every Nth record for performance sampling
if i % PERFORMANCE_SAMPLE_SIZE == 0 {
let insert_start = Instant::now();
tree.set(&key, value)?;
let insert_duration = insert_start.elapsed();
// Only print detailed timing for specific samples to avoid flooding output
if i % (PERFORMANCE_SAMPLE_SIZE * 10) == 0 {
println!("Record {}: Insertion took {:?}", i, insert_duration);
@@ -68,76 +68,93 @@ fn main() -> Result<(), tst::Error> {
} else {
tree.set(&key, value)?;
}
// Show progress at intervals
if (i + 1) % PROGRESS_INTERVAL == 0 || i == TOTAL_RECORDS - 1 {
let records_in_batch = i + 1 - last_batch_records;
let batch_duration = last_batch_time.elapsed();
let records_per_second = records_in_batch as f64 / batch_duration.as_secs_f64();
insertion_times.push((i + 1, batch_duration));
print!("\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1, TOTAL_RECORDS,
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
records_per_second);
print!(
"\rProgress: {}/{} records ({:.2}%) - {:.2} records/sec",
i + 1,
TOTAL_RECORDS,
(i + 1) as f64 / TOTAL_RECORDS as f64 * 100.0,
records_per_second
);
io::stdout().flush().unwrap();
last_batch_time = Instant::now();
last_batch_records = i + 1;
}
}
let total_duration = start_time.elapsed();
println!("\n\nPerformance Summary:");
println!("Total time to insert {} records: {:?}", TOTAL_RECORDS, total_duration);
println!("Average insertion rate: {:.2} records/second",
TOTAL_RECORDS as f64 / total_duration.as_secs_f64());
println!(
"Total time to insert {} records: {:?}",
TOTAL_RECORDS, total_duration
);
println!(
"Average insertion rate: {:.2} records/second",
TOTAL_RECORDS as f64 / total_duration.as_secs_f64()
);
// Show performance trend
println!("\nPerformance Trend (records inserted vs. time per batch):");
for (i, (record_count, duration)) in insertion_times.iter().enumerate() {
if i % 10 == 0 || i == insertion_times.len() - 1 { // Only show every 10th point to avoid too much output
println!(" After {} records: {:?} for {} records ({:.2} records/sec)",
record_count,
duration,
PROGRESS_INTERVAL,
PROGRESS_INTERVAL as f64 / duration.as_secs_f64());
if i % 10 == 0 || i == insertion_times.len() - 1 {
// Only show every 10th point to avoid too much output
println!(
" After {} records: {:?} for {} records ({:.2} records/sec)",
record_count,
duration,
PROGRESS_INTERVAL,
PROGRESS_INTERVAL as f64 / duration.as_secs_f64()
);
}
}
// Test access performance with distributed samples
println!("\nTesting access performance with distributed samples...");
let mut total_get_time = Duration::new(0, 0);
let num_samples = 1000;
// Use a simple distribution pattern instead of random
for i in 0..num_samples {
// Distribute samples across the entire range
let sample_id = (i * (TOTAL_RECORDS / num_samples)) % TOTAL_RECORDS;
let key = format!("key:{:08}", sample_id);
let get_start = Instant::now();
let _ = tree.get(&key)?;
total_get_time += get_start.elapsed();
}
println!("Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32);
println!(
"Average time to retrieve a record: {:?}",
total_get_time / num_samples as u32
);
// Test prefix search performance
println!("\nTesting prefix search performance...");
let prefixes = ["key:0", "key:1", "key:5", "key:9"];
for prefix in &prefixes {
let list_start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = list_start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}",
keys.len(), prefix, list_duration);
println!(
"Found {} keys with prefix '{}' in {:?}",
keys.len(),
prefix,
list_duration
);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
@@ -145,6 +162,6 @@ fn main() -> Result<(), tst::Error> {
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}
}

View File

@@ -1,82 +1,137 @@
use tst::TST;
use std::time::Instant;
use tst::TST;
fn main() -> Result<(), tst::Error> {
// Create a temporary directory for the database
let db_path = std::env::temp_dir().join("tst_prefix_example");
std::fs::create_dir_all(&db_path)?;
println!("Creating ternary search tree at: {}", db_path.display());
// Create a new TST
let mut tree = TST::new(db_path.to_str().unwrap(), true)?;
// Insert a variety of keys with different prefixes
println!("Inserting data with various prefixes...");
// Names
let names = [
"Alice", "Alexander", "Amanda", "Andrew", "Amy",
"Bob", "Barbara", "Benjamin", "Brenda", "Brian",
"Charlie", "Catherine", "Christopher", "Cynthia", "Carl",
"David", "Diana", "Daniel", "Deborah", "Donald",
"Edward", "Elizabeth", "Eric", "Emily", "Ethan"
"Alice",
"Alexander",
"Amanda",
"Andrew",
"Amy",
"Bob",
"Barbara",
"Benjamin",
"Brenda",
"Brian",
"Charlie",
"Catherine",
"Christopher",
"Cynthia",
"Carl",
"David",
"Diana",
"Daniel",
"Deborah",
"Donald",
"Edward",
"Elizabeth",
"Eric",
"Emily",
"Ethan",
];
for (i, name) in names.iter().enumerate() {
let value = format!("person-{}", i).into_bytes();
tree.set(name, value)?;
}
// Cities
let cities = [
"New York", "Los Angeles", "Chicago", "Houston", "Phoenix",
"Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose",
"Austin", "Jacksonville", "Fort Worth", "Columbus", "San Francisco",
"Charlotte", "Indianapolis", "Seattle", "Denver", "Washington"
"New York",
"Los Angeles",
"Chicago",
"Houston",
"Phoenix",
"Philadelphia",
"San Antonio",
"San Diego",
"Dallas",
"San Jose",
"Austin",
"Jacksonville",
"Fort Worth",
"Columbus",
"San Francisco",
"Charlotte",
"Indianapolis",
"Seattle",
"Denver",
"Washington",
];
for (i, city) in cities.iter().enumerate() {
let value = format!("city-{}", i).into_bytes();
tree.set(city, value)?;
}
// Countries
let countries = [
"United States", "Canada", "Mexico", "Brazil", "Argentina",
"United Kingdom", "France", "Germany", "Italy", "Spain",
"China", "Japan", "India", "Australia", "Russia"
"United States",
"Canada",
"Mexico",
"Brazil",
"Argentina",
"United Kingdom",
"France",
"Germany",
"Italy",
"Spain",
"China",
"Japan",
"India",
"Australia",
"Russia",
];
for (i, country) in countries.iter().enumerate() {
let value = format!("country-{}", i).into_bytes();
tree.set(country, value)?;
}
println!("Total items inserted: {}", names.len() + cities.len() + countries.len());
println!(
"Total items inserted: {}",
names.len() + cities.len() + countries.len()
);
// Test prefix operations
test_prefix(&mut tree, "A")?;
test_prefix(&mut tree, "B")?;
test_prefix(&mut tree, "C")?;
test_prefix(&mut tree, "San")?;
test_prefix(&mut tree, "United")?;
// Test non-existent prefix
test_prefix(&mut tree, "Z")?;
// Test empty prefix (should return all keys)
println!("\nTesting empty prefix (should return all keys):");
let start = Instant::now();
let all_keys = tree.list("")?;
let duration = start.elapsed();
println!("Found {} keys with empty prefix in {:?}", all_keys.len(), duration);
println!(
"Found {} keys with empty prefix in {:?}",
all_keys.len(),
duration
);
println!("First 5 keys (alphabetically):");
for key in all_keys.iter().take(5) {
println!(" {}", key);
}
// Clean up (optional)
if std::env::var("KEEP_DB").is_err() {
std::fs::remove_dir_all(&db_path)?;
@@ -84,39 +139,46 @@ fn main() -> Result<(), tst::Error> {
} else {
println!("\nDatabase kept at: {}", db_path.display());
}
Ok(())
}
fn test_prefix(tree: &mut TST, prefix: &str) -> Result<(), tst::Error> {
println!("\nTesting prefix '{}':", prefix);
// Test list operation
let start = Instant::now();
let keys = tree.list(prefix)?;
let list_duration = start.elapsed();
println!("Found {} keys with prefix '{}' in {:?}", keys.len(), prefix, list_duration);
println!(
"Found {} keys with prefix '{}' in {:?}",
keys.len(),
prefix,
list_duration
);
if !keys.is_empty() {
println!("Keys:");
for key in &keys {
println!(" {}", key);
}
// Test getall operation
let start = Instant::now();
let values = tree.getall(prefix)?;
let getall_duration = start.elapsed();
println!("Retrieved {} values in {:?}", values.len(), getall_duration);
println!("First value: {}",
if !values.is_empty() {
String::from_utf8_lossy(&values[0])
} else {
"None".into()
});
println!(
"First value: {}",
if !values.is_empty() {
String::from_utf8_lossy(&values[0])
} else {
"None".into()
}
);
}
Ok(())
}
}

View File

@@ -1,7 +1,7 @@
//! Error types for the TST module.
use thiserror::Error;
use std::io;
use thiserror::Error;
/// Error type for TST operations.
#[derive(Debug, Error)]
@@ -9,28 +9,28 @@ pub enum Error {
/// Error from OurDB operations.
#[error("OurDB error: {0}")]
OurDB(#[from] ourdb::Error),
/// Error when a key is not found.
#[error("Key not found: {0}")]
KeyNotFound(String),
/// Error when a prefix is not found.
#[error("Prefix not found: {0}")]
PrefixNotFound(String),
/// Error during serialization.
#[error("Serialization error: {0}")]
Serialization(String),
/// Error during deserialization.
#[error("Deserialization error: {0}")]
Deserialization(String),
/// Error for invalid operations.
#[error("Invalid operation: {0}")]
InvalidOperation(String),
/// IO error.
#[error("IO error: {0}")]
IO(#[from] io::Error),
}
}

View File

@@ -18,7 +18,7 @@ use ourdb::OurDB;
pub struct TST {
/// Database for persistent storage
db: OurDB,
/// Database ID of the root node
root_id: Option<u32>,
}
@@ -119,4 +119,4 @@ impl TST {
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
operations::getall(self, prefix)
}
}
}

View File

@@ -5,19 +5,19 @@
pub struct TSTNode {
/// The character stored at this node.
pub character: char,
/// Value stored at this node (empty if not end of key).
pub value: Vec<u8>,
/// Whether this node represents the end of a key.
pub is_end_of_key: bool,
/// Reference to the left child node (for characters < current character).
pub left_id: Option<u32>,
/// Reference to the middle child node (for next character in key).
pub middle_id: Option<u32>,
/// Reference to the right child node (for characters > current character).
pub right_id: Option<u32>,
}
@@ -34,7 +34,7 @@ impl TSTNode {
right_id: None,
}
}
/// Creates a new root node.
pub fn new_root() -> Self {
Self {
@@ -46,4 +46,4 @@ impl TSTNode {
right_id: None,
}
}
}
}

View File

@@ -9,19 +9,19 @@ use std::path::PathBuf;
/// Creates a new TST with the specified database path.
pub fn new_tst(path: &str, reset: bool) -> Result<TST, Error> {
let path_buf = PathBuf::from(path);
// Create the configuration for OurDB with reset parameter
let config = OurDBConfig {
path: path_buf.clone(),
incremental_mode: true,
file_size: Some(1024 * 1024), // 1MB file size for better performance with large datasets
keysize: Some(4), // Use keysize=4 (default)
reset: Some(reset), // Use the reset parameter
keysize: Some(4), // Use keysize=4 (default)
reset: Some(reset), // Use the reset parameter
};
// Create a new OurDB instance (it will handle reset internally)
let mut db = OurDB::new(config)?;
let root_id = if db.get_next_id()? == 1 || reset {
// Create a new root node
let root = TSTNode::new_root();
@@ -29,17 +29,14 @@ pub fn new_tst(path: &str, reset: bool) -> Result<TST, Error> {
id: None,
data: &root.serialize(),
})?;
Some(root_id)
} else {
// Use existing root node
Some(1) // Root node always has ID 1
};
Ok(TST {
db,
root_id,
})
Ok(TST { db, root_id })
}
/// Sets a key-value pair in the tree.
@@ -47,45 +44,51 @@ pub fn set(tree: &mut TST, key: &str, value: Vec<u8>) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
set_recursive(tree, root_id, &chars, 0, value)?;
Ok(())
}
/// Recursive helper function for setting a key-value pair.
fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value: Vec<u8>) -> Result<u32, Error> {
fn set_recursive(
tree: &mut TST,
node_id: u32,
chars: &[char],
pos: usize,
value: Vec<u8>,
) -> Result<u32, Error> {
let mut node = tree.get_node(node_id)?;
if pos >= chars.len() {
// We've reached the end of the key
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
let current_char = chars[pos];
if node.character == '\0' {
// Root node or empty node, set the character
node.character = current_char;
let node_id = tree.save_node(Some(node_id), &node)?;
// Continue with the next character
if pos + 1 < chars.len() {
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
let mut updated_node = tree.get_node(node_id)?;
updated_node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &updated_node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
} else {
// This is the last character
@@ -95,7 +98,7 @@ fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value
return tree.save_node(Some(node_id), &updated_node);
}
}
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
@@ -104,11 +107,11 @@ fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value
// Create new left node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.left_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else if current_char > node.character {
@@ -119,11 +122,11 @@ fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value
// Create new right node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.right_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else {
@@ -134,18 +137,18 @@ fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value
node.value = value;
return tree.save_node(Some(node_id), &node);
}
if let Some(middle_id) = node.middle_id {
return set_recursive(tree, middle_id, chars, pos + 1, value);
} else {
// Create new middle node
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
}
}
@@ -156,15 +159,15 @@ pub fn get(tree: &mut TST, key: &str) -> Result<Vec<u8>, Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let node = tree.get_node(node_id)?;
if node.is_end_of_key {
Ok(node.value.clone())
@@ -176,13 +179,13 @@ pub fn get(tree: &mut TST, key: &str) -> Result<Vec<u8>, Error> {
/// Finds a node by key.
fn find_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
let node = tree.get_node(node_id)?;
if pos >= chars.len() {
return Ok(node_id);
}
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
@@ -216,21 +219,21 @@ pub fn delete(tree: &mut TST, key: &str) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let mut node = tree.get_node(node_id)?;
if !node.is_end_of_key {
return Err(Error::KeyNotFound(key.to_string()));
}
// If the node has a middle child, just mark it as not end of key
if node.middle_id.is_some() || node.left_id.is_some() || node.right_id.is_some() {
node.is_end_of_key = false;
@@ -238,14 +241,14 @@ pub fn delete(tree: &mut TST, key: &str) -> Result<(), Error> {
tree.save_node(Some(node_id), &node)?;
return Ok(());
}
// Otherwise, we need to remove the node and update its parent
// This is more complex and would require tracking the path to the node
// For simplicity, we'll just mark it as not end of key for now
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
Ok(())
}
@@ -255,46 +258,51 @@ pub fn list(tree: &mut TST, prefix: &str) -> Result<Vec<String>, Error> {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let mut result = Vec::new();
// Handle empty prefix case - will return all keys
if prefix.is_empty() {
collect_all_keys(tree, root_id, String::new(), &mut result)?;
return Ok(result);
}
// Find the node corresponding to the prefix
let chars: Vec<char> = prefix.chars().collect();
let node_id = match find_prefix_node(tree, root_id, &chars, 0) {
Ok(id) => id,
Err(_) => return Ok(Vec::new()), // Prefix not found, return empty list
};
// For empty prefix, we start with an empty string
// For non-empty prefix, we start with the prefix minus the last character
// (since the last character is in the node we found)
let prefix_base = if chars.len() > 1 {
chars[0..chars.len()-1].iter().collect()
chars[0..chars.len() - 1].iter().collect()
} else {
String::new()
};
// Collect all keys from the subtree
collect_keys_with_prefix(tree, node_id, prefix_base, &mut result)?;
Ok(result)
}
/// Finds the node corresponding to a prefix.
fn find_prefix_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
fn find_prefix_node(
tree: &mut TST,
node_id: u32,
chars: &[char],
pos: usize,
) -> Result<u32, Error> {
if pos >= chars.len() {
return Ok(node_id);
}
let node = tree.get_node(node_id)?;
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
@@ -331,32 +339,32 @@ fn collect_keys_with_prefix(
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
let mut new_path = current_path.clone();
// For non-root nodes, add the character to the path
if node.character != '\0' {
new_path.push(node.character);
}
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(new_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_keys_with_prefix(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
collect_keys_with_prefix(tree, middle_id, new_path.clone(), result)?;
}
if let Some(right_id) = node.right_id {
collect_keys_with_prefix(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
@@ -368,32 +376,32 @@ fn collect_all_keys(
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
let mut new_path = current_path.clone();
// Skip adding the character for the root node
if node.character != '\0' {
new_path.push(node.character);
}
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(new_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_all_keys(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
collect_all_keys(tree, middle_id, new_path.clone(), result)?;
}
if let Some(right_id) = node.right_id {
collect_all_keys(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
@@ -401,23 +409,23 @@ fn collect_all_keys(
pub fn getall(tree: &mut TST, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Get all matching keys
let keys = list(tree, prefix)?;
// Get values for each key
let mut values = Vec::new();
let mut errors = Vec::new();
for key in keys {
match get(tree, &key) {
Ok(value) => values.push(value),
Err(e) => errors.push(format!("Error getting value for key '{}': {:?}", key, e))
Err(e) => errors.push(format!("Error getting value for key '{}': {:?}", key, e)),
}
}
// If we couldn't get any values but had keys, return the first error
if values.is_empty() && !errors.is_empty() {
return Err(Error::InvalidOperation(errors.join("; ")));
}
Ok(values)
}
@@ -442,4 +450,4 @@ impl TST {
Err(err) => Err(Error::OurDB(err)),
}
}
}
}

View File

@@ -10,17 +10,17 @@ impl TSTNode {
/// Serializes a node to bytes for storage.
pub fn serialize(&self) -> Vec<u8> {
let mut buffer = Vec::new();
// Version
buffer.push(VERSION);
// Character (as UTF-32)
let char_bytes = (self.character as u32).to_le_bytes();
buffer.extend_from_slice(&char_bytes);
// Is end of key
buffer.push(if self.is_end_of_key { 1 } else { 0 });
// Value (only if is_end_of_key)
if self.is_end_of_key {
let value_len = (self.value.len() as u32).to_le_bytes();
@@ -30,88 +30,100 @@ impl TSTNode {
// Zero length
buffer.extend_from_slice(&[0, 0, 0, 0]);
}
// Child pointers
let left_id = self.left_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&left_id);
let middle_id = self.middle_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&middle_id);
let right_id = self.right_id.unwrap_or(0).to_le_bytes();
buffer.extend_from_slice(&right_id);
buffer
}
/// Deserializes bytes to a node.
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
if data.len() < 14 { // Minimum size: version + char + is_end + value_len + 3 child IDs
if data.len() < 14 {
// Minimum size: version + char + is_end + value_len + 3 child IDs
return Err(Error::Deserialization("Data too short".to_string()));
}
let mut pos = 0;
// Version
let version = data[pos];
pos += 1;
if version != VERSION {
return Err(Error::Deserialization(format!("Unsupported version: {}", version)));
return Err(Error::Deserialization(format!(
"Unsupported version: {}",
version
)));
}
// Character
let char_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let char_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let char_code = u32::from_le_bytes(char_bytes);
let character = char::from_u32(char_code)
.ok_or_else(|| Error::Deserialization("Invalid character".to_string()))?;
pos += 4;
// Is end of key
let is_end_of_key = data[pos] != 0;
pos += 1;
// Value length
let value_len_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let value_len_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let value_len = u32::from_le_bytes(value_len_bytes) as usize;
pos += 4;
// Value
let value = if value_len > 0 {
if pos + value_len > data.len() {
return Err(Error::Deserialization("Value length exceeds data".to_string()));
return Err(Error::Deserialization(
"Value length exceeds data".to_string(),
));
}
data[pos..pos+value_len].to_vec()
data[pos..pos + value_len].to_vec()
} else {
Vec::new()
};
pos += value_len;
// Child pointers
if pos + 12 > data.len() {
return Err(Error::Deserialization("Data too short for child pointers".to_string()));
return Err(Error::Deserialization(
"Data too short for child pointers".to_string(),
));
}
let left_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let left_id_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let left_id = u32::from_le_bytes(left_id_bytes);
pos += 4;
let middle_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let middle_id_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let middle_id = u32::from_le_bytes(middle_id_bytes);
pos += 4;
let right_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
let right_id_bytes = [data[pos], data[pos + 1], data[pos + 2], data[pos + 3]];
let right_id = u32::from_le_bytes(right_id_bytes);
Ok(TSTNode {
character,
value,
is_end_of_key,
left_id: if left_id == 0 { None } else { Some(left_id) },
middle_id: if middle_id == 0 { None } else { Some(middle_id) },
middle_id: if middle_id == 0 {
None
} else {
Some(middle_id)
},
right_id: if right_id == 0 { None } else { Some(right_id) },
})
}
}
// Function removed as it was unused
// Function removed as it was unused

View File

@@ -1,24 +1,24 @@
use tst::TST;
use std::env::temp_dir;
use std::fs;
use std::time::SystemTime;
use tst::TST;
fn get_test_db_path() -> String {
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_nanos();
let path = temp_dir().join(format!("tst_test_{}", timestamp));
// If the path exists, remove it first
if path.exists() {
let _ = fs::remove_dir_all(&path);
}
// Create the directory
fs::create_dir_all(&path).unwrap();
path.to_string_lossy().to_string()
}
@@ -30,44 +30,44 @@ fn cleanup_test_db(path: &str) {
#[test]
fn test_create_tst() {
let path = get_test_db_path();
let result = TST::new(&path, true);
match &result {
Ok(_) => (),
Err(e) => println!("Error creating TST: {:?}", e),
}
assert!(result.is_ok());
if let Ok(mut tst) = result {
// Make sure we can perform a basic operation
let set_result = tst.set("test_key", b"test_value".to_vec());
assert!(set_result.is_ok());
}
cleanup_test_db(&path);
}
#[test]
fn test_set_and_get() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Test setting and getting a key
let key = "test_key";
let value = b"test_value".to_vec();
let set_result = tree.set(key, value.clone());
assert!(set_result.is_ok());
let get_result = tree.get(key);
assert!(get_result.is_ok());
assert_eq!(get_result.unwrap(), value);
// Make sure to clean up properly
cleanup_test_db(&path);
}
@@ -75,45 +75,45 @@ fn test_set_and_get() {
#[test]
fn test_get_nonexistent_key() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Test getting a key that doesn't exist
let get_result = tree.get("nonexistent_key");
assert!(get_result.is_err());
cleanup_test_db(&path);
}
#[test]
fn test_delete() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Set a key
let key = "delete_test";
let value = b"to_be_deleted".to_vec();
let set_result = tree.set(key, value);
assert!(set_result.is_ok());
// Verify it exists
let get_result = tree.get(key);
assert!(get_result.is_ok());
// Delete it
let delete_result = tree.delete(key);
assert!(delete_result.is_ok());
// Verify it's gone
let get_after_delete = tree.get(key);
assert!(get_after_delete.is_err());
// Make sure to clean up properly
cleanup_test_db(&path);
}
@@ -121,28 +121,28 @@ fn test_delete() {
#[test]
fn test_multiple_keys() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert multiple keys - use fewer keys to avoid filling the lookup table
let keys = ["apple", "banana", "cherry"];
for (i, key) in keys.iter().enumerate() {
let value = format!("value_{}", i).into_bytes();
let set_result = tree.set(key, value);
// Print error if set fails
if set_result.is_err() {
println!("Error setting key '{}': {:?}", key, set_result);
}
assert!(set_result.is_ok());
}
// Verify all keys exist
for (i, key) in keys.iter().enumerate() {
let expected_value = format!("value_{}", i).into_bytes();
@@ -150,7 +150,7 @@ fn test_multiple_keys() {
assert!(get_result.is_ok());
assert_eq!(get_result.unwrap(), expected_value);
}
// Make sure to clean up properly
cleanup_test_db(&path);
}
@@ -158,56 +158,53 @@ fn test_multiple_keys() {
#[test]
fn test_list_prefix() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert keys with common prefixes - use fewer keys to avoid filling the lookup table
let keys = [
"apple", "application", "append",
"banana", "bandana"
];
let keys = ["apple", "application", "append", "banana", "bandana"];
for key in &keys {
let set_result = tree.set(key, key.as_bytes().to_vec());
assert!(set_result.is_ok());
}
// Test prefix "app"
let list_result = tree.list("app");
assert!(list_result.is_ok());
let app_keys = list_result.unwrap();
// Print the keys for debugging
println!("Keys with prefix 'app':");
for key in &app_keys {
println!(" {}", key);
}
// Check that each key is present
assert!(app_keys.contains(&"apple".to_string()));
assert!(app_keys.contains(&"application".to_string()));
assert!(app_keys.contains(&"append".to_string()));
// Test prefix "ban"
let list_result = tree.list("ban");
assert!(list_result.is_ok());
let ban_keys = list_result.unwrap();
assert!(ban_keys.contains(&"banana".to_string()));
assert!(ban_keys.contains(&"bandana".to_string()));
// Test non-existent prefix
let list_result = tree.list("z");
assert!(list_result.is_ok());
let z_keys = list_result.unwrap();
assert_eq!(z_keys.len(), 0);
// Make sure to clean up properly
cleanup_test_db(&path);
}
@@ -215,46 +212,44 @@ fn test_list_prefix() {
#[test]
fn test_getall_prefix() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert keys with common prefixes - use fewer keys to avoid filling the lookup table
let keys = [
"apple", "application", "append"
];
let keys = ["apple", "application", "append"];
for key in &keys {
let set_result = tree.set(key, key.as_bytes().to_vec());
assert!(set_result.is_ok());
}
// Test getall with prefix "app"
let getall_result = tree.getall("app");
assert!(getall_result.is_ok());
let app_values = getall_result.unwrap();
// Convert values to strings for easier comparison
let app_value_strings: Vec<String> = app_values
.iter()
.map(|v| String::from_utf8_lossy(v).to_string())
.collect();
// Print the values for debugging
println!("Values with prefix 'app':");
for value in &app_value_strings {
println!(" {}", value);
}
// Check that each value is present
assert!(app_value_strings.contains(&"apple".to_string()));
assert!(app_value_strings.contains(&"application".to_string()));
assert!(app_value_strings.contains(&"append".to_string()));
// Make sure to clean up properly
cleanup_test_db(&path);
}
@@ -262,38 +257,38 @@ fn test_getall_prefix() {
#[test]
fn test_empty_prefix() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert some keys
let keys = ["apple", "banana", "cherry"];
for key in &keys {
let set_result = tree.set(key, key.as_bytes().to_vec());
assert!(set_result.is_ok());
}
// Test list with empty prefix (should return all keys)
let list_result = tree.list("");
assert!(list_result.is_ok());
let all_keys = list_result.unwrap();
// Print the keys for debugging
println!("Keys with empty prefix:");
for key in &all_keys {
println!(" {}", key);
}
// Check that each key is present
for key in &keys {
assert!(all_keys.contains(&key.to_string()));
}
// Make sure to clean up properly
cleanup_test_db(&path);
}
}

View File

@@ -1,24 +1,24 @@
use tst::TST;
use std::env::temp_dir;
use std::fs;
use std::time::SystemTime;
use tst::TST;
fn get_test_db_path() -> String {
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_nanos();
let path = temp_dir().join(format!("tst_prefix_test_{}", timestamp));
// If the path exists, remove it first
if path.exists() {
let _ = fs::remove_dir_all(&path);
}
// Create the directory
fs::create_dir_all(&path).unwrap();
path.to_string_lossy().to_string()
}
@@ -30,9 +30,9 @@ fn cleanup_test_db(path: &str) {
#[test]
fn test_prefix_with_common_prefixes() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let test_data = [
("test", b"value1".to_vec()),
@@ -41,34 +41,34 @@ fn test_prefix_with_common_prefixes() {
("tests", b"value4".to_vec()),
("tester", b"value5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "test"
let keys = tree.list("test").unwrap();
assert_eq!(keys.len(), 5);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
// Test prefix "teste"
let keys = tree.list("teste").unwrap();
assert_eq!(keys.len(), 2);
assert!(keys.contains(&"tested".to_string()));
assert!(keys.contains(&"tester".to_string()));
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_different_prefixes() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with different prefixes
let test_data = [
("apple", b"fruit1".to_vec()),
@@ -77,64 +77,64 @@ fn test_prefix_with_different_prefixes() {
("date", b"fruit4".to_vec()),
("elderberry", b"fruit5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test each prefix
for (key, _) in &test_data {
let prefix = &key[0..1]; // First character
let keys = tree.list(prefix).unwrap();
assert!(keys.contains(&key.to_string()));
}
// Test non-existent prefix
let keys = tree.list("z").unwrap();
assert_eq!(keys.len(), 0);
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_empty_string() {
let path = get_test_db_path();
// Create a new TST with reset=true to ensure a clean state
let result = TST::new(&path, true);
assert!(result.is_ok());
let mut tree = result.unwrap();
// Insert some keys
let test_data = [
("apple", b"fruit1".to_vec()),
("banana", b"fruit2".to_vec()),
("cherry", b"fruit3".to_vec()),
];
for (key, value) in &test_data {
let set_result = tree.set(key, value.clone());
assert!(set_result.is_ok());
}
// Test empty prefix (should return all keys)
let list_result = tree.list("");
assert!(list_result.is_ok());
let keys = list_result.unwrap();
// Print the keys for debugging
println!("Keys with empty prefix:");
for key in &keys {
println!(" {}", key);
}
// Check that each key is present
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
// Make sure to clean up properly
cleanup_test_db(&path);
}
@@ -142,9 +142,9 @@ fn test_prefix_with_empty_string() {
#[test]
fn test_getall_with_prefix() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with common prefixes
let test_data = [
("test", b"value1".to_vec()),
@@ -153,28 +153,28 @@ fn test_getall_with_prefix() {
("tests", b"value4".to_vec()),
("tester", b"value5".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test getall with prefix "test"
let values = tree.getall("test").unwrap();
assert_eq!(values.len(), 5);
for (_, value) in &test_data {
assert!(values.contains(value));
}
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_unicode_characters() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert keys with Unicode characters
let test_data = [
("café", b"coffee".to_vec()),
@@ -182,77 +182,86 @@ fn test_prefix_with_unicode_characters() {
("caffè", b"italian coffee".to_vec()),
("café au lait", b"coffee with milk".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "café"
let keys = tree.list("café").unwrap();
// Print the keys for debugging
println!("Keys with prefix 'café':");
for key in &keys {
println!(" {}", key);
}
// Check that the keys we expect are present
assert!(keys.contains(&"café".to_string()));
assert!(keys.contains(&"café au lait".to_string()));
// We don't assert on the exact count because Unicode handling can vary
// Test prefix "caf"
let keys = tree.list("caf").unwrap();
// Print the keys for debugging
println!("Keys with prefix 'caf':");
for key in &keys {
println!(" {}", key);
}
// Check that each key is present individually
// Due to Unicode handling, we need to be careful with exact matching
// The important thing is that we can find the keys we need
// Check that we have at least the café and café au lait keys
assert!(keys.contains(&"café".to_string()));
assert!(keys.contains(&"café au lait".to_string()));
// We don't assert on the exact count because Unicode handling can vary
cleanup_test_db(&path);
}
#[test]
fn test_prefix_with_long_keys() {
let path = get_test_db_path();
let mut tree = TST::new(&path, true).unwrap();
// Insert long keys
let test_data = [
("this_is_a_very_long_key_for_testing_purposes_1", b"value1".to_vec()),
("this_is_a_very_long_key_for_testing_purposes_2", b"value2".to_vec()),
("this_is_a_very_long_key_for_testing_purposes_3", b"value3".to_vec()),
(
"this_is_a_very_long_key_for_testing_purposes_1",
b"value1".to_vec(),
),
(
"this_is_a_very_long_key_for_testing_purposes_2",
b"value2".to_vec(),
),
(
"this_is_a_very_long_key_for_testing_purposes_3",
b"value3".to_vec(),
),
("this_is_another_long_key_for_testing", b"value4".to_vec()),
];
for (key, value) in &test_data {
tree.set(key, value.clone()).unwrap();
}
// Test prefix "this_is_a_very"
let keys = tree.list("this_is_a_very").unwrap();
assert_eq!(keys.len(), 3);
// Test prefix "this_is"
let keys = tree.list("this_is").unwrap();
assert_eq!(keys.len(), 4);
for (key, _) in &test_data {
assert!(keys.contains(&key.to_string()));
}
cleanup_test_db(&path);
}
}