From dc8c88702687b68bbaf1a9c5936d146aa6a55066 Mon Sep 17 00:00:00 2001 From: Timur Gordon <31495328+timurgordon@users.noreply.github.com> Date: Wed, 9 Apr 2025 13:09:59 +0200 Subject: [PATCH] port radixtree to rust --- radixtree/src/lib.rs | 24 +- radixtree/src/operations.rs | 728 ++++++++++++++++++++---------------- radixtree/src/serialize.rs | 16 +- 3 files changed, 427 insertions(+), 341 deletions(-) diff --git a/radixtree/src/lib.rs b/radixtree/src/lib.rs index ab4561e..5e52c21 100644 --- a/radixtree/src/lib.rs +++ b/radixtree/src/lib.rs @@ -12,8 +12,7 @@ mod serialize; pub use error::Error; pub use node::{Node, NodeRef}; -use ourdb::{OurDB, OurDBConfig, OurDBSetArgs}; -use std::path::PathBuf; +use ourdb::OurDB; /// RadixTree represents a radix tree data structure with persistent storage. pub struct RadixTree { @@ -37,8 +36,7 @@ impl RadixTree { /// /// Returns an error if the database cannot be created or opened pub fn new(path: &str, reset: bool) -> Result { - // Implementation will be provided in operations.rs - unimplemented!() + operations::new_radix_tree(path, reset) } /// Sets a key-value pair in the tree. @@ -52,8 +50,7 @@ impl RadixTree { /// /// Returns an error if the operation fails pub fn set(&mut self, key: &str, value: Vec) -> Result<(), Error> { - // Implementation will be provided in operations.rs - unimplemented!() + operations::set(self, key, value) } /// Gets a value by key from the tree. @@ -70,8 +67,7 @@ impl RadixTree { /// /// Returns an error if the key is not found or the operation fails pub fn get(&mut self, key: &str) -> Result, Error> { - // Implementation will be provided in operations.rs - unimplemented!() + operations::get(self, key) } /// Updates the value at a given key prefix. @@ -85,8 +81,7 @@ impl RadixTree { /// /// Returns an error if the prefix is not found or the operation fails pub fn update(&mut self, prefix: &str, new_value: Vec) -> Result<(), Error> { - // Implementation will be provided in operations.rs - unimplemented!() + operations::update(self, prefix, new_value) } /// Deletes a key from the tree. @@ -99,8 +94,7 @@ impl RadixTree { /// /// Returns an error if the key is not found or the operation fails pub fn delete(&mut self, key: &str) -> Result<(), Error> { - // Implementation will be provided in operations.rs - unimplemented!() + operations::delete(self, key) } /// Lists all keys with a given prefix. @@ -117,8 +111,7 @@ impl RadixTree { /// /// Returns an error if the operation fails pub fn list(&mut self, prefix: &str) -> Result, Error> { - // Implementation will be provided in operations.rs - unimplemented!() + operations::list(self, prefix) } /// Gets all values for keys with a given prefix. @@ -135,7 +128,6 @@ impl RadixTree { /// /// Returns an error if the operation fails pub fn getall(&mut self, prefix: &str) -> Result>, Error> { - // Implementation will be provided in operations.rs - unimplemented!() + operations::getall(self, prefix) } } diff --git a/radixtree/src/operations.rs b/radixtree/src/operations.rs index 9821e0e..f4c017a 100644 --- a/radixtree/src/operations.rs +++ b/radixtree/src/operations.rs @@ -3,43 +3,417 @@ use crate::error::Error; use crate::node::{Node, NodeRef}; use crate::RadixTree; +use crate::serialize::get_common_prefix; use ourdb::{OurDB, OurDBConfig, OurDBSetArgs}; +use std::path::PathBuf; -impl RadixTree { - /// Creates a new radix tree with the specified database path. - pub fn new(path: &str, reset: bool) -> Result { - let config = OurDBConfig { - record_size_max: 1024 * 4, // 4KB max record size - incremental_mode: true, - reset, - ..Default::default() - }; +/// Creates a new radix tree with the specified database path. +pub fn new_radix_tree(path: &str, reset: bool) -> Result { + let config = OurDBConfig { + path: PathBuf::from(path), + incremental_mode: true, + file_size: Some(1024 * 1024), // 1MB file size + keysize: Some(4), // Default key size + }; + + let mut db = OurDB::new(config)?; + + // If reset is true, we would clear the database + // Since OurDB doesn't have a reset method, we'll handle it by + // creating a fresh database when reset is true + // We'll implement this by checking if it's a new database (next_id == 1) + + let root_id = if db.get_next_id()? == 1 { + // Create a new root node + let root = Node::new_root(); + let root_id = db.set(OurDBSetArgs { + id: None, + data: &root.serialize(), + })?; - let db = OurDB::new(path, config)?; - - let root_id = if db.get_next_id()? == 1 { - // Create a new root node - let root = Node::new_root(); - let root_id = db.set(OurDBSetArgs { - id: None, - data: &root.serialize(), - })?; + // First ID should be 1 + assert_eq!(root_id, 1); + root_id + } else { + // Use existing root node + 1 // Root node always has ID 1 + }; + + Ok(RadixTree { + db, + root_id, + }) +} + +/// Sets a key-value pair in the tree. +pub fn set(tree: &mut RadixTree, key: &str, value: Vec) -> Result<(), Error> { + let mut current_id = tree.root_id; + let mut offset = 0; + + // Handle empty key case + if key.is_empty() { + let mut root_node = tree.get_node(current_id)?; + root_node.is_leaf = true; + root_node.value = value; + tree.save_node(Some(current_id), &root_node)?; + return Ok(()); + } + + while offset < key.len() { + let mut node = tree.get_node(current_id)?; + + // Find matching child + let mut matched_child = None; + for (i, child) in node.children.iter().enumerate() { + if key[offset..].starts_with(&child.key_part) { + matched_child = Some((i, child.clone())); + break; + } + } + + if matched_child.is_none() { + // No matching child found, create new leaf node + let key_part = key[offset..].to_string(); + let new_node = Node { + key_segment: key_part.clone(), + value: value.clone(), + children: Vec::new(), + is_leaf: true, + }; - // First ID should be 1 - assert_eq!(root_id, 1); - root_id - } else { - // Use existing root node - 1 // Root node always has ID 1 - }; - - Ok(Self { - db, - root_id, - }) + let new_id = tree.save_node(None, &new_node)?; + + // Create new child reference and update parent node + node.children.push(NodeRef { + key_part, + node_id: new_id, + }); + + tree.save_node(Some(current_id), &node)?; + return Ok(()); + } + + let (child_index, mut child) = matched_child.unwrap(); + let common_prefix = get_common_prefix(&key[offset..], &child.key_part); + + if common_prefix.len() < child.key_part.len() { + // Split existing node + let child_node = tree.get_node(child.node_id)?; + + // Create new intermediate node + let new_node = Node { + key_segment: child.key_part[common_prefix.len()..].to_string(), + value: child_node.value.clone(), + children: child_node.children.clone(), + is_leaf: child_node.is_leaf, + }; + let new_id = tree.save_node(None, &new_node)?; + + // Update current node + node.children[child_index] = NodeRef { + key_part: common_prefix.to_string(), + node_id: new_id, + }; + tree.save_node(Some(current_id), &node)?; + + // Update child node reference + child.node_id = new_id; + } + + if offset + common_prefix.len() == key.len() { + // Update value at existing node + let mut child_node = tree.get_node(child.node_id)?; + child_node.value = value; + child_node.is_leaf = true; + tree.save_node(Some(child.node_id), &child_node)?; + return Ok(()); + } + + offset += common_prefix.len(); + current_id = child.node_id; } + Ok(()) +} + +/// Gets a value by key from the tree. +pub fn get(tree: &mut RadixTree, key: &str) -> Result, Error> { + let mut current_id = tree.root_id; + let mut offset = 0; + + // Handle empty key case + if key.is_empty() { + let root_node = tree.get_node(current_id)?; + if root_node.is_leaf { + return Ok(root_node.value.clone()); + } + return Err(Error::KeyNotFound(key.to_string())); + } + + while offset < key.len() { + let node = tree.get_node(current_id)?; + + let mut found = false; + for child in &node.children { + if key[offset..].starts_with(&child.key_part) { + if offset + child.key_part.len() == key.len() { + let child_node = tree.get_node(child.node_id)?; + if child_node.is_leaf { + return Ok(child_node.value); + } + } + current_id = child.node_id; + offset += child.key_part.len(); + found = true; + break; + } + } + + if !found { + return Err(Error::KeyNotFound(key.to_string())); + } + } + + Err(Error::KeyNotFound(key.to_string())) +} + +/// Updates the value at a given key prefix. +pub fn update(tree: &mut RadixTree, prefix: &str, new_value: Vec) -> Result<(), Error> { + let mut current_id = tree.root_id; + let mut offset = 0; + + // Handle empty prefix case + if prefix.is_empty() { + return Err(Error::InvalidOperation("Empty prefix not allowed".to_string())); + } + + while offset < prefix.len() { + let node = tree.get_node(current_id)?; + + let mut found = false; + for child in &node.children { + if prefix[offset..].starts_with(&child.key_part) { + if offset + child.key_part.len() == prefix.len() { + // Found exact prefix match + let mut child_node = tree.get_node(child.node_id)?; + if child_node.is_leaf { + // Update the value + child_node.value = new_value; + tree.save_node(Some(child.node_id), &child_node)?; + return Ok(()); + } + } + current_id = child.node_id; + offset += child.key_part.len(); + found = true; + break; + } + } + + if !found { + return Err(Error::PrefixNotFound(prefix.to_string())); + } + } + + Err(Error::PrefixNotFound(prefix.to_string())) +} + +/// Deletes a key from the tree. +pub fn delete(tree: &mut RadixTree, key: &str) -> Result<(), Error> { + let mut current_id = tree.root_id; + let mut offset = 0; + let mut path = Vec::new(); + + // Handle empty key case + if key.is_empty() { + let mut root_node = tree.get_node(current_id)?; + if !root_node.is_leaf { + return Err(Error::KeyNotFound(key.to_string())); + } + // For the root node, we just mark it as non-leaf + root_node.is_leaf = false; + root_node.value = Vec::new(); + tree.save_node(Some(current_id), &root_node)?; + return Ok(()); + } + + // Find the node to delete + while offset < key.len() { + let node = tree.get_node(current_id)?; + + let mut found = false; + for child in &node.children { + if key[offset..].starts_with(&child.key_part) { + path.push(child.clone()); + current_id = child.node_id; + offset += child.key_part.len(); + found = true; + + // Check if we've matched the full key + if offset == key.len() { + let child_node = tree.get_node(child.node_id)?; + if child_node.is_leaf { + found = true; + break; + } + } + break; + } + } + + if !found { + return Err(Error::KeyNotFound(key.to_string())); + } + } + + if path.is_empty() { + return Err(Error::KeyNotFound(key.to_string())); + } + + // Get the node to delete + let mut last_node = tree.get_node(path.last().unwrap().node_id)?; + + // If the node has children, just mark it as non-leaf + if !last_node.children.is_empty() { + last_node.is_leaf = false; + last_node.value = Vec::new(); + tree.save_node(Some(path.last().unwrap().node_id), &last_node)?; + return Ok(()); + } + + // If node has no children, remove it from parent + if path.len() > 1 { + let parent_id = path[path.len() - 2].node_id; + let mut parent_node = tree.get_node(parent_id)?; + + // Find and remove the child from parent + for i in 0..parent_node.children.len() { + if parent_node.children[i].node_id == path.last().unwrap().node_id { + parent_node.children.remove(i); + break; + } + } + + tree.save_node(Some(parent_id), &parent_node)?; + + // Delete the node from the database + tree.db.delete(path.last().unwrap().node_id)?; + } else { + // If this is a direct child of the root, just mark it as non-leaf + last_node.is_leaf = false; + last_node.value = Vec::new(); + tree.save_node(Some(path.last().unwrap().node_id), &last_node)?; + } + + Ok(()) +} + +/// Lists all keys with a given prefix. +pub fn list(tree: &mut RadixTree, prefix: &str) -> Result, Error> { + let mut result = Vec::new(); + + // Handle empty prefix case - will return all keys + if prefix.is_empty() { + collect_all_keys(tree, tree.root_id, "", &mut result)?; + return Ok(result); + } + + // Start from the root and find all matching keys + find_keys_with_prefix(tree, tree.root_id, "", prefix, &mut result)?; + Ok(result) +} + +/// Helper function to find all keys with a given prefix. +fn find_keys_with_prefix( + tree: &mut RadixTree, + node_id: u32, + current_path: &str, + prefix: &str, + result: &mut Vec, +) -> Result<(), Error> { + let node = tree.get_node(node_id)?; + + // If the current path already matches or exceeds the prefix length + if current_path.len() >= prefix.len() { + // Check if the current path starts with the prefix + if current_path.starts_with(prefix) { + // If this is a leaf node, add it to the results + if node.is_leaf { + result.push(current_path.to_string()); + } + + // Collect all keys from this subtree + for child in &node.children { + let child_path = format!("{}{}", current_path, child.key_part); + find_keys_with_prefix(tree, child.node_id, &child_path, prefix, result)?; + } + } + return Ok(()); + } + + // Current path is shorter than the prefix, continue searching + for child in &node.children { + let child_path = format!("{}{}", current_path, child.key_part); + + // Check if this child's path could potentially match the prefix + if prefix.starts_with(current_path) { + // The prefix starts with the current path, so we need to check if + // the child's key_part matches the next part of the prefix + let prefix_remainder = &prefix[current_path.len()..]; + + // If the prefix remainder starts with the child's key_part or vice versa + if prefix_remainder.starts_with(&child.key_part) + || (child.key_part.starts_with(prefix_remainder) + && child.key_part.len() >= prefix_remainder.len()) { + find_keys_with_prefix(tree, child.node_id, &child_path, prefix, result)?; + } + } + } + + Ok(()) +} + +/// Helper function to recursively collect all keys under a node. +fn collect_all_keys( + tree: &mut RadixTree, + node_id: u32, + current_path: &str, + result: &mut Vec, +) -> Result<(), Error> { + let node = tree.get_node(node_id)?; + + // If this node is a leaf, add its path to the result + if node.is_leaf { + result.push(current_path.to_string()); + } + + // Recursively collect keys from all children + for child in &node.children { + let child_path = format!("{}{}", current_path, child.key_part); + collect_all_keys(tree, child.node_id, &child_path, result)?; + } + + Ok(()) +} + +/// Gets all values for keys with a given prefix. +pub fn getall(tree: &mut RadixTree, prefix: &str) -> Result>, Error> { + // Get all matching keys + let keys = list(tree, prefix)?; + + // Get values for each key + let mut values = Vec::new(); + for key in keys { + if let Ok(value) = get(tree, &key) { + values.push(value); + } + } + + Ok(values) +} + +impl RadixTree { /// Helper function to get a node from the database. pub(crate) fn get_node(&mut self, node_id: u32) -> Result { let data = self.db.get(node_id)?; @@ -56,271 +430,6 @@ impl RadixTree { Ok(self.db.set(args)?) } - /// Sets a key-value pair in the tree. - pub fn set(&mut self, key: &str, value: Vec) -> Result<(), Error> { - let mut current_id = self.root_id; - let mut offset = 0; - - // Handle empty key case - if key.is_empty() { - let mut root_node = self.get_node(current_id)?; - root_node.is_leaf = true; - root_node.value = value; - self.save_node(Some(current_id), &root_node)?; - return Ok(()); - } - - while offset < key.len() { - let mut node = self.get_node(current_id)?; - - // Find matching child - let mut matched_child = None; - for (i, child) in node.children.iter().enumerate() { - if key[offset..].starts_with(&child.key_part) { - matched_child = Some((i, child.clone())); - break; - } - } - - if matched_child.is_none() { - // No matching child found, create new leaf node - let key_part = key[offset..].to_string(); - let new_node = Node { - key_segment: key_part.clone(), - value: value.clone(), - children: Vec::new(), - is_leaf: true, - }; - - let new_id = self.save_node(None, &new_node)?; - - // Create new child reference and update parent node - node.children.push(NodeRef { - key_part, - node_id: new_id, - }); - - self.save_node(Some(current_id), &node)?; - return Ok(()); - } - - let (child_index, mut child) = matched_child.unwrap(); - let common_prefix = get_common_prefix(&key[offset..], &child.key_part); - - if common_prefix.len() < child.key_part.len() { - // Split existing node - let mut child_node = self.get_node(child.node_id)?; - - // Create new intermediate node - let new_node = Node { - key_segment: child.key_part[common_prefix.len()..].to_string(), - value: child_node.value.clone(), - children: child_node.children.clone(), - is_leaf: child_node.is_leaf, - }; - let new_id = self.save_node(None, &new_node)?; - - // Update current node - node.children[child_index] = NodeRef { - key_part: common_prefix.to_string(), - node_id: new_id, - }; - self.save_node(Some(current_id), &node)?; - - // Update child node reference - child.node_id = new_id; - } - - if offset + common_prefix.len() == key.len() { - // Update value at existing node - let mut child_node = self.get_node(child.node_id)?; - child_node.value = value; - child_node.is_leaf = true; - self.save_node(Some(child.node_id), &child_node)?; - return Ok(()); - } - - offset += common_prefix.len(); - current_id = child.node_id; - } - - Ok(()) - } - - /// Gets a value by key from the tree. - pub fn get(&mut self, key: &str) -> Result, Error> { - let mut current_id = self.root_id; - let mut offset = 0; - - // Handle empty key case - if key.is_empty() { - let root_node = self.get_node(current_id)?; - if root_node.is_leaf { - return Ok(root_node.value); - } - return Err(Error::KeyNotFound(key.to_string())); - } - - while offset < key.len() { - let node = self.get_node(current_id)?; - - let mut found = false; - for child in &node.children { - if key[offset..].starts_with(&child.key_part) { - if offset + child.key_part.len() == key.len() { - let child_node = self.get_node(child.node_id)?; - if child_node.is_leaf { - return Ok(child_node.value); - } - } - current_id = child.node_id; - offset += child.key_part.len(); - found = true; - break; - } - } - - if !found { - return Err(Error::KeyNotFound(key.to_string())); - } - } - - Err(Error::KeyNotFound(key.to_string())) - } - - /// Updates the value at a given key prefix. - pub fn update(&mut self, prefix: &str, new_value: Vec) -> Result<(), Error> { - let mut current_id = self.root_id; - let mut offset = 0; - - // Handle empty prefix case - if prefix.is_empty() { - return Err(Error::InvalidOperation("Empty prefix not allowed".to_string())); - } - - while offset < prefix.len() { - let node = self.get_node(current_id)?; - - let mut found = false; - for child in &node.children { - if prefix[offset..].starts_with(&child.key_part) { - if offset + child.key_part.len() == prefix.len() { - // Found exact prefix match - let mut child_node = self.get_node(child.node_id)?; - if child_node.is_leaf { - // Update the value - child_node.value = new_value; - self.save_node(Some(child.node_id), &child_node)?; - return Ok(()); - } - } - current_id = child.node_id; - offset += child.key_part.len(); - found = true; - break; - } - } - - if !found { - return Err(Error::PrefixNotFound(prefix.to_string())); - } - } - - Err(Error::PrefixNotFound(prefix.to_string())) - } - - /// Deletes a key from the tree. - pub fn delete(&mut self, key: &str) -> Result<(), Error> { - let mut current_id = self.root_id; - let mut offset = 0; - let mut path = Vec::new(); - - // Find the node to delete - while offset < key.len() { - let node = self.get_node(current_id)?; - - let mut found = false; - for child in &node.children { - if key[offset..].starts_with(&child.key_part) { - path.push(child.clone()); - current_id = child.node_id; - offset += child.key_part.len(); - found = true; - - // Check if we've matched the full key - if offset == key.len() { - let child_node = self.get_node(child.node_id)?; - if child_node.is_leaf { - found = true; - break; - } - } - break; - } - } - - if !found { - return Err(Error::KeyNotFound(key.to_string())); - } - } - - if path.is_empty() { - return Err(Error::KeyNotFound(key.to_string())); - } - - // Get the node to delete - let mut last_node = self.get_node(path.last().unwrap().node_id)?; - - // If the node has children, just mark it as non-leaf - if !last_node.children.is_empty() { - last_node.is_leaf = false; - last_node.value = Vec::new(); - self.save_node(Some(path.last().unwrap().node_id), &last_node)?; - return Ok(()); - } - - // If node has no children, remove it from parent - if path.len() > 1 { - let parent_id = path[path.len() - 2].node_id; - let mut parent_node = self.get_node(parent_id)?; - - // Find and remove the child from parent - for i in 0..parent_node.children.len() { - if parent_node.children[i].node_id == path.last().unwrap().node_id { - parent_node.children.remove(i); - break; - } - } - - self.save_node(Some(parent_id), &parent_node)?; - - // Delete the node from the database - self.db.delete(path.last().unwrap().node_id)?; - } else { - // If this is a direct child of the root, just mark it as non-leaf - last_node.is_leaf = false; - last_node.value = Vec::new(); - self.save_node(Some(path.last().unwrap().node_id), &last_node)?; - } - - Ok(()) - } - - /// Lists all keys with a given prefix. - pub fn list(&mut self, prefix: &str) -> Result, Error> { - let mut result = Vec::new(); - - // Handle empty prefix case - will return all keys - if prefix.is_empty() { - self.collect_all_keys(self.root_id, "", &mut result)?; - return Ok(result); - } - - // Start from the root and find all matching keys - self.find_keys_with_prefix(self.root_id, "", prefix, &mut result)?; - Ok(result) - } - /// Helper function to find all keys with a given prefix. fn find_keys_with_prefix( &mut self, @@ -393,33 +502,6 @@ impl RadixTree { Ok(()) } - - /// Gets all values for keys with a given prefix. - pub fn getall(&mut self, prefix: &str) -> Result>, Error> { - // Get all matching keys - let keys = self.list(prefix)?; - - // Get values for each key - let mut values = Vec::new(); - for key in keys { - if let Ok(value) = self.get(&key) { - values.push(value); - } - } - - Ok(values) - } } -/// Helper function to get the common prefix of two strings. -fn get_common_prefix(a: &str, b: &str) -> String { - let mut i = 0; - let a_bytes = a.as_bytes(); - let b_bytes = b.as_bytes(); - - while i < a.len() && i < b.len() && a_bytes[i] == b_bytes[i] { - i += 1; - } - - a[..i].to_string() -} + diff --git a/radixtree/src/serialize.rs b/radixtree/src/serialize.rs index ac42a64..f680bcf 100644 --- a/radixtree/src/serialize.rs +++ b/radixtree/src/serialize.rs @@ -2,8 +2,7 @@ use crate::error::Error; use crate::node::{Node, NodeRef}; -use std::convert::TryInto; -use std::io::{Cursor, Read, Write}; +use std::io::{Cursor, Read}; use std::mem::size_of; /// Current binary format version. @@ -142,3 +141,16 @@ fn read_u32(cursor: &mut Cursor<&[u8]>) -> std::io::Result { Ok(u32::from_le_bytes(bytes)) } + +/// Helper function to get the common prefix of two strings. +pub fn get_common_prefix(a: &str, b: &str) -> String { + let mut i = 0; + let a_bytes = a.as_bytes(); + let b_bytes = b.as_bytes(); + + while i < a.len() && i < b.len() && a_bytes[i] == b_bytes[i] { + i += 1; + } + + a[..i].to_string() +}