418 lines
13 KiB
Rust
418 lines
13 KiB
Rust
//! Implementation of TST operations.
|
|
|
|
use crate::error::Error;
|
|
use crate::node::TSTNode;
|
|
use crate::TST;
|
|
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
|
|
use std::path::PathBuf;
|
|
|
|
/// Creates a new TST with the specified database path.
|
|
pub fn new_tst(path: &str, reset: bool) -> Result<TST, Error> {
|
|
// If the path exists and reset is true, remove it first
|
|
let path_buf = PathBuf::from(path);
|
|
if path_buf.exists() && reset {
|
|
std::fs::remove_dir_all(&path_buf)?;
|
|
}
|
|
|
|
// Create the directory if it doesn't exist
|
|
std::fs::create_dir_all(&path_buf)?;
|
|
|
|
let config = OurDBConfig {
|
|
path: path_buf,
|
|
incremental_mode: true,
|
|
file_size: Some(1024 * 1024), // 10MB file size for better performance with large datasets
|
|
keysize: Some(4), // Use keysize=4 (default)
|
|
};
|
|
|
|
let mut db = OurDB::new(config)?;
|
|
|
|
let root_id = if db.get_next_id()? == 1 || reset {
|
|
// Create a new root node
|
|
let root = TSTNode::new_root();
|
|
let root_id = db.set(OurDBSetArgs {
|
|
id: None,
|
|
data: &root.serialize(),
|
|
})?;
|
|
|
|
Some(root_id)
|
|
} else {
|
|
// Use existing root node
|
|
Some(1) // Root node always has ID 1
|
|
};
|
|
|
|
Ok(TST {
|
|
db,
|
|
root_id,
|
|
})
|
|
}
|
|
|
|
/// Sets a key-value pair in the tree.
|
|
pub fn set(tree: &mut TST, key: &str, value: Vec<u8>) -> Result<(), Error> {
|
|
if key.is_empty() {
|
|
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
|
|
}
|
|
|
|
let root_id = match tree.root_id {
|
|
Some(id) => id,
|
|
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
|
|
};
|
|
|
|
let chars: Vec<char> = key.chars().collect();
|
|
set_recursive(tree, root_id, &chars, 0, value)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Recursive helper function for setting a key-value pair.
|
|
fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value: Vec<u8>) -> Result<u32, Error> {
|
|
let mut node = tree.get_node(node_id)?;
|
|
|
|
if pos >= chars.len() {
|
|
// We've reached the end of the key
|
|
node.is_end_of_key = true;
|
|
node.value = value;
|
|
return tree.save_node(Some(node_id), &node);
|
|
}
|
|
|
|
let current_char = chars[pos];
|
|
|
|
if node.character == '\0' {
|
|
// Root node or empty node, set the character
|
|
node.character = current_char;
|
|
let node_id = tree.save_node(Some(node_id), &node)?;
|
|
|
|
// Continue with the next character
|
|
if pos + 1 < chars.len() {
|
|
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
|
|
let new_id = tree.save_node(None, &new_node)?;
|
|
|
|
let mut updated_node = tree.get_node(node_id)?;
|
|
updated_node.middle_id = Some(new_id);
|
|
tree.save_node(Some(node_id), &updated_node)?;
|
|
|
|
return set_recursive(tree, new_id, chars, pos + 1, value);
|
|
} else {
|
|
// This is the last character
|
|
let mut updated_node = tree.get_node(node_id)?;
|
|
updated_node.is_end_of_key = true;
|
|
updated_node.value = value;
|
|
return tree.save_node(Some(node_id), &updated_node);
|
|
}
|
|
}
|
|
|
|
if current_char < node.character {
|
|
// Go left
|
|
if let Some(left_id) = node.left_id {
|
|
return set_recursive(tree, left_id, chars, pos, value);
|
|
} else {
|
|
// Create new left node
|
|
let new_node = TSTNode::new(current_char, Vec::new(), false);
|
|
let new_id = tree.save_node(None, &new_node)?;
|
|
|
|
// Update current node
|
|
node.left_id = Some(new_id);
|
|
tree.save_node(Some(node_id), &node)?;
|
|
|
|
return set_recursive(tree, new_id, chars, pos, value);
|
|
}
|
|
} else if current_char > node.character {
|
|
// Go right
|
|
if let Some(right_id) = node.right_id {
|
|
return set_recursive(tree, right_id, chars, pos, value);
|
|
} else {
|
|
// Create new right node
|
|
let new_node = TSTNode::new(current_char, Vec::new(), false);
|
|
let new_id = tree.save_node(None, &new_node)?;
|
|
|
|
// Update current node
|
|
node.right_id = Some(new_id);
|
|
tree.save_node(Some(node_id), &node)?;
|
|
|
|
return set_recursive(tree, new_id, chars, pos, value);
|
|
}
|
|
} else {
|
|
// Character matches, go middle (next character)
|
|
if pos + 1 >= chars.len() {
|
|
// This is the last character
|
|
node.is_end_of_key = true;
|
|
node.value = value;
|
|
return tree.save_node(Some(node_id), &node);
|
|
}
|
|
|
|
if let Some(middle_id) = node.middle_id {
|
|
return set_recursive(tree, middle_id, chars, pos + 1, value);
|
|
} else {
|
|
// Create new middle node
|
|
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
|
|
let new_id = tree.save_node(None, &new_node)?;
|
|
|
|
// Update current node
|
|
node.middle_id = Some(new_id);
|
|
tree.save_node(Some(node_id), &node)?;
|
|
|
|
return set_recursive(tree, new_id, chars, pos + 1, value);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Gets a value by key from the tree.
|
|
pub fn get(tree: &mut TST, key: &str) -> Result<Vec<u8>, Error> {
|
|
if key.is_empty() {
|
|
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
|
|
}
|
|
|
|
let root_id = match tree.root_id {
|
|
Some(id) => id,
|
|
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
|
|
};
|
|
|
|
let chars: Vec<char> = key.chars().collect();
|
|
let node_id = find_node(tree, root_id, &chars, 0)?;
|
|
|
|
let node = tree.get_node(node_id)?;
|
|
if node.is_end_of_key {
|
|
Ok(node.value.clone())
|
|
} else {
|
|
Err(Error::KeyNotFound(key.to_string()))
|
|
}
|
|
}
|
|
|
|
/// Finds a node by key.
|
|
fn find_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
|
|
let node = tree.get_node(node_id)?;
|
|
|
|
if pos >= chars.len() {
|
|
return Ok(node_id);
|
|
}
|
|
|
|
let current_char = chars[pos];
|
|
|
|
if current_char < node.character {
|
|
// Go left
|
|
if let Some(left_id) = node.left_id {
|
|
find_node(tree, left_id, chars, pos)
|
|
} else {
|
|
Err(Error::KeyNotFound(chars.iter().collect()))
|
|
}
|
|
} else if current_char > node.character {
|
|
// Go right
|
|
if let Some(right_id) = node.right_id {
|
|
find_node(tree, right_id, chars, pos)
|
|
} else {
|
|
Err(Error::KeyNotFound(chars.iter().collect()))
|
|
}
|
|
} else {
|
|
// Character matches
|
|
if pos + 1 >= chars.len() {
|
|
// This is the last character
|
|
Ok(node_id)
|
|
} else if let Some(middle_id) = node.middle_id {
|
|
// Go to next character
|
|
find_node(tree, middle_id, chars, pos + 1)
|
|
} else {
|
|
Err(Error::KeyNotFound(chars.iter().collect()))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Deletes a key from the tree.
|
|
pub fn delete(tree: &mut TST, key: &str) -> Result<(), Error> {
|
|
if key.is_empty() {
|
|
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
|
|
}
|
|
|
|
let root_id = match tree.root_id {
|
|
Some(id) => id,
|
|
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
|
|
};
|
|
|
|
let chars: Vec<char> = key.chars().collect();
|
|
let node_id = find_node(tree, root_id, &chars, 0)?;
|
|
|
|
let mut node = tree.get_node(node_id)?;
|
|
|
|
if !node.is_end_of_key {
|
|
return Err(Error::KeyNotFound(key.to_string()));
|
|
}
|
|
|
|
// If the node has a middle child, just mark it as not end of key
|
|
if node.middle_id.is_some() || node.left_id.is_some() || node.right_id.is_some() {
|
|
node.is_end_of_key = false;
|
|
node.value = Vec::new();
|
|
tree.save_node(Some(node_id), &node)?;
|
|
return Ok(());
|
|
}
|
|
|
|
// Otherwise, we need to remove the node and update its parent
|
|
// This is more complex and would require tracking the path to the node
|
|
// For simplicity, we'll just mark it as not end of key for now
|
|
node.is_end_of_key = false;
|
|
node.value = Vec::new();
|
|
tree.save_node(Some(node_id), &node)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Lists all keys with a given prefix.
|
|
pub fn list(tree: &mut TST, prefix: &str) -> Result<Vec<String>, Error> {
|
|
let root_id = match tree.root_id {
|
|
Some(id) => id,
|
|
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
|
|
};
|
|
|
|
let mut result = Vec::new();
|
|
|
|
// Handle empty prefix case - will return all keys
|
|
if prefix.is_empty() {
|
|
collect_all_keys(tree, root_id, String::new(), &mut result)?;
|
|
return Ok(result);
|
|
}
|
|
|
|
// Find the node corresponding to the prefix
|
|
let chars: Vec<char> = prefix.chars().collect();
|
|
let node_id = match find_prefix_node(tree, root_id, &chars, 0) {
|
|
Ok(id) => id,
|
|
Err(_) => return Ok(Vec::new()), // Prefix not found, return empty list
|
|
};
|
|
|
|
// Collect all keys from the subtree
|
|
collect_keys_with_prefix(tree, node_id, prefix.to_string(), &mut result)?;
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Finds the node corresponding to a prefix.
|
|
fn find_prefix_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
|
|
if pos >= chars.len() {
|
|
return Ok(node_id);
|
|
}
|
|
|
|
let node = tree.get_node(node_id)?;
|
|
let current_char = chars[pos];
|
|
|
|
if current_char < node.character {
|
|
// Go left
|
|
if let Some(left_id) = node.left_id {
|
|
find_prefix_node(tree, left_id, chars, pos)
|
|
} else {
|
|
Err(Error::PrefixNotFound(chars.iter().collect()))
|
|
}
|
|
} else if current_char > node.character {
|
|
// Go right
|
|
if let Some(right_id) = node.right_id {
|
|
find_prefix_node(tree, right_id, chars, pos)
|
|
} else {
|
|
Err(Error::PrefixNotFound(chars.iter().collect()))
|
|
}
|
|
} else {
|
|
// Character matches
|
|
if pos + 1 >= chars.len() {
|
|
// This is the last character of the prefix
|
|
Ok(node_id)
|
|
} else if let Some(middle_id) = node.middle_id {
|
|
// Go to next character
|
|
find_prefix_node(tree, middle_id, chars, pos + 1)
|
|
} else {
|
|
Err(Error::PrefixNotFound(chars.iter().collect()))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Collects all keys with a given prefix.
|
|
fn collect_keys_with_prefix(
|
|
tree: &mut TST,
|
|
node_id: u32,
|
|
current_path: String,
|
|
result: &mut Vec<String>,
|
|
) -> Result<(), Error> {
|
|
let node = tree.get_node(node_id)?;
|
|
|
|
// If this node is an end of key, add it to the result
|
|
if node.is_end_of_key {
|
|
result.push(current_path.clone());
|
|
}
|
|
|
|
// Recursively collect keys from all children
|
|
if let Some(left_id) = node.left_id {
|
|
collect_all_keys(tree, left_id, current_path.clone(), result)?;
|
|
}
|
|
|
|
if let Some(middle_id) = node.middle_id {
|
|
let mut new_path = current_path.clone();
|
|
new_path.push(node.character);
|
|
collect_all_keys(tree, middle_id, new_path, result)?;
|
|
}
|
|
|
|
if let Some(right_id) = node.right_id {
|
|
collect_all_keys(tree, right_id, current_path.clone(), result)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Recursively collects all keys under a node.
|
|
fn collect_all_keys(
|
|
tree: &mut TST,
|
|
node_id: u32,
|
|
current_path: String,
|
|
result: &mut Vec<String>,
|
|
) -> Result<(), Error> {
|
|
let node = tree.get_node(node_id)?;
|
|
|
|
let mut new_path = current_path.clone();
|
|
new_path.push(node.character);
|
|
|
|
// If this node is an end of key, add it to the result
|
|
if node.is_end_of_key {
|
|
result.push(new_path.clone());
|
|
}
|
|
|
|
// Recursively collect keys from all children
|
|
if let Some(left_id) = node.left_id {
|
|
collect_all_keys(tree, left_id, current_path.clone(), result)?;
|
|
}
|
|
|
|
if let Some(middle_id) = node.middle_id {
|
|
collect_all_keys(tree, middle_id, new_path.clone(), result)?;
|
|
}
|
|
|
|
if let Some(right_id) = node.right_id {
|
|
collect_all_keys(tree, right_id, current_path.clone(), result)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Gets all values for keys with a given prefix.
|
|
pub fn getall(tree: &mut TST, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
|
|
// Get all matching keys
|
|
let keys = list(tree, prefix)?;
|
|
|
|
// Get values for each key
|
|
let mut values = Vec::new();
|
|
for key in keys {
|
|
if let Ok(value) = get(tree, &key) {
|
|
values.push(value);
|
|
}
|
|
}
|
|
|
|
Ok(values)
|
|
}
|
|
|
|
impl TST {
|
|
/// Helper function to get a node from the database.
|
|
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<TSTNode, Error> {
|
|
let data = self.db.get(node_id)?;
|
|
TSTNode::deserialize(&data)
|
|
}
|
|
|
|
/// Helper function to save a node to the database.
|
|
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &TSTNode) -> Result<u32, Error> {
|
|
let data = node.serialize();
|
|
let args = OurDBSetArgs {
|
|
id: node_id,
|
|
data: &data,
|
|
};
|
|
Ok(self.db.set(args)?)
|
|
}
|
|
} |