db/tst/src/operations.rs
2025-04-20 06:34:31 +02:00

418 lines
13 KiB
Rust

//! Implementation of TST operations.
use crate::error::Error;
use crate::node::TSTNode;
use crate::TST;
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
/// Creates a new TST with the specified database path.
pub fn new_tst(path: &str, reset: bool) -> Result<TST, Error> {
// If the path exists and reset is true, remove it first
let path_buf = PathBuf::from(path);
if path_buf.exists() && reset {
std::fs::remove_dir_all(&path_buf)?;
}
// Create the directory if it doesn't exist
std::fs::create_dir_all(&path_buf)?;
let config = OurDBConfig {
path: path_buf,
incremental_mode: true,
file_size: Some(1024 * 1024), // 10MB file size for better performance with large datasets
keysize: Some(4), // Use keysize=4 (default)
};
let mut db = OurDB::new(config)?;
let root_id = if db.get_next_id()? == 1 || reset {
// Create a new root node
let root = TSTNode::new_root();
let root_id = db.set(OurDBSetArgs {
id: None,
data: &root.serialize(),
})?;
Some(root_id)
} else {
// Use existing root node
Some(1) // Root node always has ID 1
};
Ok(TST {
db,
root_id,
})
}
/// Sets a key-value pair in the tree.
pub fn set(tree: &mut TST, key: &str, value: Vec<u8>) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
set_recursive(tree, root_id, &chars, 0, value)?;
Ok(())
}
/// Recursive helper function for setting a key-value pair.
fn set_recursive(tree: &mut TST, node_id: u32, chars: &[char], pos: usize, value: Vec<u8>) -> Result<u32, Error> {
let mut node = tree.get_node(node_id)?;
if pos >= chars.len() {
// We've reached the end of the key
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
let current_char = chars[pos];
if node.character == '\0' {
// Root node or empty node, set the character
node.character = current_char;
let node_id = tree.save_node(Some(node_id), &node)?;
// Continue with the next character
if pos + 1 < chars.len() {
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
let mut updated_node = tree.get_node(node_id)?;
updated_node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &updated_node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
} else {
// This is the last character
let mut updated_node = tree.get_node(node_id)?;
updated_node.is_end_of_key = true;
updated_node.value = value;
return tree.save_node(Some(node_id), &updated_node);
}
}
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
return set_recursive(tree, left_id, chars, pos, value);
} else {
// Create new left node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.left_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
return set_recursive(tree, right_id, chars, pos, value);
} else {
// Create new right node
let new_node = TSTNode::new(current_char, Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.right_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos, value);
}
} else {
// Character matches, go middle (next character)
if pos + 1 >= chars.len() {
// This is the last character
node.is_end_of_key = true;
node.value = value;
return tree.save_node(Some(node_id), &node);
}
if let Some(middle_id) = node.middle_id {
return set_recursive(tree, middle_id, chars, pos + 1, value);
} else {
// Create new middle node
let new_node = TSTNode::new(chars[pos + 1], Vec::new(), false);
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.middle_id = Some(new_id);
tree.save_node(Some(node_id), &node)?;
return set_recursive(tree, new_id, chars, pos + 1, value);
}
}
}
/// Gets a value by key from the tree.
pub fn get(tree: &mut TST, key: &str) -> Result<Vec<u8>, Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let node = tree.get_node(node_id)?;
if node.is_end_of_key {
Ok(node.value.clone())
} else {
Err(Error::KeyNotFound(key.to_string()))
}
}
/// Finds a node by key.
fn find_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
let node = tree.get_node(node_id)?;
if pos >= chars.len() {
return Ok(node_id);
}
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
find_node(tree, left_id, chars, pos)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
find_node(tree, right_id, chars, pos)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
} else {
// Character matches
if pos + 1 >= chars.len() {
// This is the last character
Ok(node_id)
} else if let Some(middle_id) = node.middle_id {
// Go to next character
find_node(tree, middle_id, chars, pos + 1)
} else {
Err(Error::KeyNotFound(chars.iter().collect()))
}
}
}
/// Deletes a key from the tree.
pub fn delete(tree: &mut TST, key: &str) -> Result<(), Error> {
if key.is_empty() {
return Err(Error::InvalidOperation("Empty key not allowed".to_string()));
}
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let chars: Vec<char> = key.chars().collect();
let node_id = find_node(tree, root_id, &chars, 0)?;
let mut node = tree.get_node(node_id)?;
if !node.is_end_of_key {
return Err(Error::KeyNotFound(key.to_string()));
}
// If the node has a middle child, just mark it as not end of key
if node.middle_id.is_some() || node.left_id.is_some() || node.right_id.is_some() {
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
return Ok(());
}
// Otherwise, we need to remove the node and update its parent
// This is more complex and would require tracking the path to the node
// For simplicity, we'll just mark it as not end of key for now
node.is_end_of_key = false;
node.value = Vec::new();
tree.save_node(Some(node_id), &node)?;
Ok(())
}
/// Lists all keys with a given prefix.
pub fn list(tree: &mut TST, prefix: &str) -> Result<Vec<String>, Error> {
let root_id = match tree.root_id {
Some(id) => id,
None => return Err(Error::InvalidOperation("Tree not initialized".to_string())),
};
let mut result = Vec::new();
// Handle empty prefix case - will return all keys
if prefix.is_empty() {
collect_all_keys(tree, root_id, String::new(), &mut result)?;
return Ok(result);
}
// Find the node corresponding to the prefix
let chars: Vec<char> = prefix.chars().collect();
let node_id = match find_prefix_node(tree, root_id, &chars, 0) {
Ok(id) => id,
Err(_) => return Ok(Vec::new()), // Prefix not found, return empty list
};
// Collect all keys from the subtree
collect_keys_with_prefix(tree, node_id, prefix.to_string(), &mut result)?;
Ok(result)
}
/// Finds the node corresponding to a prefix.
fn find_prefix_node(tree: &mut TST, node_id: u32, chars: &[char], pos: usize) -> Result<u32, Error> {
if pos >= chars.len() {
return Ok(node_id);
}
let node = tree.get_node(node_id)?;
let current_char = chars[pos];
if current_char < node.character {
// Go left
if let Some(left_id) = node.left_id {
find_prefix_node(tree, left_id, chars, pos)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
} else if current_char > node.character {
// Go right
if let Some(right_id) = node.right_id {
find_prefix_node(tree, right_id, chars, pos)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
} else {
// Character matches
if pos + 1 >= chars.len() {
// This is the last character of the prefix
Ok(node_id)
} else if let Some(middle_id) = node.middle_id {
// Go to next character
find_prefix_node(tree, middle_id, chars, pos + 1)
} else {
Err(Error::PrefixNotFound(chars.iter().collect()))
}
}
}
/// Collects all keys with a given prefix.
fn collect_keys_with_prefix(
tree: &mut TST,
node_id: u32,
current_path: String,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(current_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_all_keys(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
let mut new_path = current_path.clone();
new_path.push(node.character);
collect_all_keys(tree, middle_id, new_path, result)?;
}
if let Some(right_id) = node.right_id {
collect_all_keys(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
/// Recursively collects all keys under a node.
fn collect_all_keys(
tree: &mut TST,
node_id: u32,
current_path: String,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
let mut new_path = current_path.clone();
new_path.push(node.character);
// If this node is an end of key, add it to the result
if node.is_end_of_key {
result.push(new_path.clone());
}
// Recursively collect keys from all children
if let Some(left_id) = node.left_id {
collect_all_keys(tree, left_id, current_path.clone(), result)?;
}
if let Some(middle_id) = node.middle_id {
collect_all_keys(tree, middle_id, new_path.clone(), result)?;
}
if let Some(right_id) = node.right_id {
collect_all_keys(tree, right_id, current_path.clone(), result)?;
}
Ok(())
}
/// Gets all values for keys with a given prefix.
pub fn getall(tree: &mut TST, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Get all matching keys
let keys = list(tree, prefix)?;
// Get values for each key
let mut values = Vec::new();
for key in keys {
if let Ok(value) = get(tree, &key) {
values.push(value);
}
}
Ok(values)
}
impl TST {
/// Helper function to get a node from the database.
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<TSTNode, Error> {
let data = self.db.get(node_id)?;
TSTNode::deserialize(&data)
}
/// Helper function to save a node to the database.
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &TSTNode) -> Result<u32, Error> {
let data = node.serialize();
let args = OurDBSetArgs {
id: node_id,
data: &data,
};
Ok(self.db.set(args)?)
}
}