add data packages and remove empty submodule

This commit is contained in:
Timur Gordon
2025-08-07 12:13:37 +02:00
parent ca736d62f3
commit d7562ce466
47 changed files with 8639 additions and 1 deletions

View File

@@ -0,0 +1,35 @@
//! Error types for the RadixTree module.
use thiserror::Error;
/// Error type for RadixTree operations.
#[derive(Debug, Error)]
pub enum Error {
/// Error from OurDB operations.
#[error("OurDB error: {0}")]
OurDB(#[from] ourdb::Error),
/// Error when a key is not found.
#[error("Key not found: {0}")]
KeyNotFound(String),
/// Error when a prefix is not found.
#[error("Prefix not found: {0}")]
PrefixNotFound(String),
/// Error during serialization.
#[error("Serialization error: {0}")]
Serialization(String),
/// Error during deserialization.
#[error("Deserialization error: {0}")]
Deserialization(String),
/// Error for invalid operations.
#[error("Invalid operation: {0}")]
InvalidOperation(String),
/// Error for I/O operations.
#[error("I/O error: {0}")]
IO(#[from] std::io::Error),
}

View File

@@ -0,0 +1,133 @@
//! RadixTree is a space-optimized tree data structure that enables efficient string key operations
//! with persistent storage using OurDB as a backend.
//!
//! This implementation provides a persistent radix tree that can be used for efficient
//! prefix-based key operations, such as auto-complete, routing tables, and more.
mod error;
mod node;
mod operations;
mod serialize;
pub use error::Error;
pub use node::{Node, NodeRef};
use ourdb::OurDB;
/// RadixTree represents a radix tree data structure with persistent storage.
pub struct RadixTree {
db: OurDB,
root_id: u32,
}
impl RadixTree {
/// Creates a new radix tree with the specified database path.
///
/// # Arguments
///
/// * `path` - The path to the database directory
/// * `reset` - Whether to reset the database if it exists
///
/// # Returns
///
/// A new `RadixTree` instance
///
/// # Errors
///
/// Returns an error if the database cannot be created or opened
pub fn new(path: &str, reset: bool) -> Result<Self, Error> {
operations::new_radix_tree(path, reset)
}
/// Sets a key-value pair in the tree.
///
/// # Arguments
///
/// * `key` - The key to set
/// * `value` - The value to set
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn set(&mut self, key: &str, value: Vec<u8>) -> Result<(), Error> {
operations::set(self, key, value)
}
/// Gets a value by key from the tree.
///
/// # Arguments
///
/// * `key` - The key to get
///
/// # Returns
///
/// The value associated with the key
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn get(&mut self, key: &str) -> Result<Vec<u8>, Error> {
operations::get(self, key)
}
/// Updates the value at a given key prefix.
///
/// # Arguments
///
/// * `prefix` - The key prefix to update
/// * `new_value` - The new value to set
///
/// # Errors
///
/// Returns an error if the prefix is not found or the operation fails
pub fn update(&mut self, prefix: &str, new_value: Vec<u8>) -> Result<(), Error> {
operations::update(self, prefix, new_value)
}
/// Deletes a key from the tree.
///
/// # Arguments
///
/// * `key` - The key to delete
///
/// # Errors
///
/// Returns an error if the key is not found or the operation fails
pub fn delete(&mut self, key: &str) -> Result<(), Error> {
operations::delete(self, key)
}
/// Lists all keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn list(&mut self, prefix: &str) -> Result<Vec<String>, Error> {
operations::list(self, prefix)
}
/// Gets all values for keys with a given prefix.
///
/// # Arguments
///
/// * `prefix` - The prefix to search for
///
/// # Returns
///
/// A list of values for keys that start with the given prefix
///
/// # Errors
///
/// Returns an error if the operation fails
pub fn getall(&mut self, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
operations::getall(self, prefix)
}
}

View File

@@ -0,0 +1,59 @@
//! Node types for the RadixTree module.
/// Represents a node in the radix tree.
#[derive(Debug, Clone, PartialEq)]
pub struct Node {
/// The segment of the key stored at this node.
pub key_segment: String,
/// Value stored at this node (empty if not a leaf).
pub value: Vec<u8>,
/// References to child nodes.
pub children: Vec<NodeRef>,
/// Whether this node is a leaf node.
pub is_leaf: bool,
}
/// Reference to a node in the database.
#[derive(Debug, Clone, PartialEq)]
pub struct NodeRef {
/// The key segment for this child.
pub key_part: String,
/// Database ID of the node.
pub node_id: u32,
}
impl Node {
/// Creates a new node.
pub fn new(key_segment: String, value: Vec<u8>, is_leaf: bool) -> Self {
Self {
key_segment,
value,
children: Vec::new(),
is_leaf,
}
}
/// Creates a new root node.
pub fn new_root() -> Self {
Self {
key_segment: String::new(),
value: Vec::new(),
children: Vec::new(),
is_leaf: false,
}
}
}
impl NodeRef {
/// Creates a new node reference.
pub fn new(key_part: String, node_id: u32) -> Self {
Self {
key_part,
node_id,
}
}
}

View File

@@ -0,0 +1,508 @@
//! Implementation of RadixTree operations.
use crate::error::Error;
use crate::node::{Node, NodeRef};
use crate::RadixTree;
use crate::serialize::get_common_prefix;
use ourdb::{OurDB, OurDBConfig, OurDBSetArgs};
use std::path::PathBuf;
/// Creates a new radix tree with the specified database path.
pub fn new_radix_tree(path: &str, reset: bool) -> Result<RadixTree, Error> {
let config = OurDBConfig {
path: PathBuf::from(path),
incremental_mode: true,
file_size: Some(1024 * 1024 * 10), // 10MB file size for better performance with large datasets
keysize: Some(6), // Use keysize=6 to support multiple files (file_nr + position)
reset: None, // Don't reset existing database
};
let mut db = OurDB::new(config)?;
// If reset is true, we would clear the database
// Since OurDB doesn't have a reset method, we'll handle it by
// creating a fresh database when reset is true
// We'll implement this by checking if it's a new database (next_id == 1)
let root_id = if db.get_next_id()? == 1 {
// Create a new root node
let root = Node::new_root();
let root_id = db.set(OurDBSetArgs {
id: None,
data: &root.serialize(),
})?;
// First ID should be 1
assert_eq!(root_id, 1);
root_id
} else {
// Use existing root node
1 // Root node always has ID 1
};
Ok(RadixTree {
db,
root_id,
})
}
/// Sets a key-value pair in the tree.
pub fn set(tree: &mut RadixTree, key: &str, value: Vec<u8>) -> Result<(), Error> {
let mut current_id = tree.root_id;
let mut offset = 0;
// Handle empty key case
if key.is_empty() {
let mut root_node = tree.get_node(current_id)?;
root_node.is_leaf = true;
root_node.value = value;
tree.save_node(Some(current_id), &root_node)?;
return Ok(());
}
while offset < key.len() {
let mut node = tree.get_node(current_id)?;
// Find matching child
let mut matched_child = None;
for (i, child) in node.children.iter().enumerate() {
if key[offset..].starts_with(&child.key_part) {
matched_child = Some((i, child.clone()));
break;
}
}
if matched_child.is_none() {
// No matching child found, create new leaf node
let key_part = key[offset..].to_string();
let new_node = Node {
key_segment: key_part.clone(),
value: value.clone(),
children: Vec::new(),
is_leaf: true,
};
let new_id = tree.save_node(None, &new_node)?;
// Create new child reference and update parent node
node.children.push(NodeRef {
key_part,
node_id: new_id,
});
tree.save_node(Some(current_id), &node)?;
return Ok(());
}
let (child_index, mut child) = matched_child.unwrap();
let common_prefix = get_common_prefix(&key[offset..], &child.key_part);
if common_prefix.len() < child.key_part.len() {
// Split existing node
let child_node = tree.get_node(child.node_id)?;
// Create new intermediate node
let new_node = Node {
key_segment: child.key_part[common_prefix.len()..].to_string(),
value: child_node.value.clone(),
children: child_node.children.clone(),
is_leaf: child_node.is_leaf,
};
let new_id = tree.save_node(None, &new_node)?;
// Update current node
node.children[child_index] = NodeRef {
key_part: common_prefix.to_string(),
node_id: new_id,
};
tree.save_node(Some(current_id), &node)?;
// Update child node reference
child.node_id = new_id;
}
if offset + common_prefix.len() == key.len() {
// Update value at existing node
let mut child_node = tree.get_node(child.node_id)?;
child_node.value = value;
child_node.is_leaf = true;
tree.save_node(Some(child.node_id), &child_node)?;
return Ok(());
}
offset += common_prefix.len();
current_id = child.node_id;
}
Ok(())
}
/// Gets a value by key from the tree.
pub fn get(tree: &mut RadixTree, key: &str) -> Result<Vec<u8>, Error> {
let mut current_id = tree.root_id;
let mut offset = 0;
// Handle empty key case
if key.is_empty() {
let root_node = tree.get_node(current_id)?;
if root_node.is_leaf {
return Ok(root_node.value.clone());
}
return Err(Error::KeyNotFound(key.to_string()));
}
while offset < key.len() {
let node = tree.get_node(current_id)?;
let mut found = false;
for child in &node.children {
if key[offset..].starts_with(&child.key_part) {
if offset + child.key_part.len() == key.len() {
let child_node = tree.get_node(child.node_id)?;
if child_node.is_leaf {
return Ok(child_node.value);
}
}
current_id = child.node_id;
offset += child.key_part.len();
found = true;
break;
}
}
if !found {
return Err(Error::KeyNotFound(key.to_string()));
}
}
Err(Error::KeyNotFound(key.to_string()))
}
/// Updates the value at a given key prefix.
pub fn update(tree: &mut RadixTree, prefix: &str, new_value: Vec<u8>) -> Result<(), Error> {
let mut current_id = tree.root_id;
let mut offset = 0;
// Handle empty prefix case
if prefix.is_empty() {
return Err(Error::InvalidOperation("Empty prefix not allowed".to_string()));
}
while offset < prefix.len() {
let node = tree.get_node(current_id)?;
let mut found = false;
for child in &node.children {
if prefix[offset..].starts_with(&child.key_part) {
if offset + child.key_part.len() == prefix.len() {
// Found exact prefix match
let mut child_node = tree.get_node(child.node_id)?;
if child_node.is_leaf {
// Update the value
child_node.value = new_value;
tree.save_node(Some(child.node_id), &child_node)?;
return Ok(());
}
}
current_id = child.node_id;
offset += child.key_part.len();
found = true;
break;
}
}
if !found {
return Err(Error::PrefixNotFound(prefix.to_string()));
}
}
Err(Error::PrefixNotFound(prefix.to_string()))
}
/// Deletes a key from the tree.
pub fn delete(tree: &mut RadixTree, key: &str) -> Result<(), Error> {
let mut current_id = tree.root_id;
let mut offset = 0;
let mut path = Vec::new();
// Handle empty key case
if key.is_empty() {
let mut root_node = tree.get_node(current_id)?;
if !root_node.is_leaf {
return Err(Error::KeyNotFound(key.to_string()));
}
// For the root node, we just mark it as non-leaf
root_node.is_leaf = false;
root_node.value = Vec::new();
tree.save_node(Some(current_id), &root_node)?;
return Ok(());
}
// Find the node to delete
while offset < key.len() {
let node = tree.get_node(current_id)?;
let mut found = false;
for child in &node.children {
if key[offset..].starts_with(&child.key_part) {
path.push(child.clone());
current_id = child.node_id;
offset += child.key_part.len();
found = true;
// Check if we've matched the full key
if offset == key.len() {
let child_node = tree.get_node(child.node_id)?;
if child_node.is_leaf {
found = true;
break;
}
}
break;
}
}
if !found {
return Err(Error::KeyNotFound(key.to_string()));
}
}
if path.is_empty() {
return Err(Error::KeyNotFound(key.to_string()));
}
// Get the node to delete
let mut last_node = tree.get_node(path.last().unwrap().node_id)?;
// If the node has children, just mark it as non-leaf
if !last_node.children.is_empty() {
last_node.is_leaf = false;
last_node.value = Vec::new();
tree.save_node(Some(path.last().unwrap().node_id), &last_node)?;
return Ok(());
}
// If node has no children, remove it from parent
if path.len() > 1 {
let parent_id = path[path.len() - 2].node_id;
let mut parent_node = tree.get_node(parent_id)?;
// Find and remove the child from parent
for i in 0..parent_node.children.len() {
if parent_node.children[i].node_id == path.last().unwrap().node_id {
parent_node.children.remove(i);
break;
}
}
tree.save_node(Some(parent_id), &parent_node)?;
// Delete the node from the database
tree.db.delete(path.last().unwrap().node_id)?;
} else {
// If this is a direct child of the root, just mark it as non-leaf
last_node.is_leaf = false;
last_node.value = Vec::new();
tree.save_node(Some(path.last().unwrap().node_id), &last_node)?;
}
Ok(())
}
/// Lists all keys with a given prefix.
pub fn list(tree: &mut RadixTree, prefix: &str) -> Result<Vec<String>, Error> {
let mut result = Vec::new();
// Handle empty prefix case - will return all keys
if prefix.is_empty() {
collect_all_keys(tree, tree.root_id, "", &mut result)?;
return Ok(result);
}
// Start from the root and find all matching keys
find_keys_with_prefix(tree, tree.root_id, "", prefix, &mut result)?;
Ok(result)
}
/// Helper function to find all keys with a given prefix.
fn find_keys_with_prefix(
tree: &mut RadixTree,
node_id: u32,
current_path: &str,
prefix: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
// If the current path already matches or exceeds the prefix length
if current_path.len() >= prefix.len() {
// Check if the current path starts with the prefix
if current_path.starts_with(prefix) {
// If this is a leaf node, add it to the results
if node.is_leaf {
result.push(current_path.to_string());
}
// Collect all keys from this subtree
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
find_keys_with_prefix(tree, child.node_id, &child_path, prefix, result)?;
}
}
return Ok(());
}
// Current path is shorter than the prefix, continue searching
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
// Check if this child's path could potentially match the prefix
if prefix.starts_with(current_path) {
// The prefix starts with the current path, so we need to check if
// the child's key_part matches the next part of the prefix
let prefix_remainder = &prefix[current_path.len()..];
// If the prefix remainder starts with the child's key_part or vice versa
if prefix_remainder.starts_with(&child.key_part)
|| (child.key_part.starts_with(prefix_remainder)
&& child.key_part.len() >= prefix_remainder.len()) {
find_keys_with_prefix(tree, child.node_id, &child_path, prefix, result)?;
}
}
}
Ok(())
}
/// Helper function to recursively collect all keys under a node.
fn collect_all_keys(
tree: &mut RadixTree,
node_id: u32,
current_path: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = tree.get_node(node_id)?;
// If this node is a leaf, add its path to the result
if node.is_leaf {
result.push(current_path.to_string());
}
// Recursively collect keys from all children
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
collect_all_keys(tree, child.node_id, &child_path, result)?;
}
Ok(())
}
/// Gets all values for keys with a given prefix.
pub fn getall(tree: &mut RadixTree, prefix: &str) -> Result<Vec<Vec<u8>>, Error> {
// Get all matching keys
let keys = list(tree, prefix)?;
// Get values for each key
let mut values = Vec::new();
for key in keys {
if let Ok(value) = get(tree, &key) {
values.push(value);
}
}
Ok(values)
}
impl RadixTree {
/// Helper function to get a node from the database.
pub(crate) fn get_node(&mut self, node_id: u32) -> Result<Node, Error> {
let data = self.db.get(node_id)?;
Node::deserialize(&data)
}
/// Helper function to save a node to the database.
pub(crate) fn save_node(&mut self, node_id: Option<u32>, node: &Node) -> Result<u32, Error> {
let data = node.serialize();
let args = OurDBSetArgs {
id: node_id,
data: &data,
};
Ok(self.db.set(args)?)
}
/// Helper function to find all keys with a given prefix.
fn find_keys_with_prefix(
&mut self,
node_id: u32,
current_path: &str,
prefix: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = self.get_node(node_id)?;
// If the current path already matches or exceeds the prefix length
if current_path.len() >= prefix.len() {
// Check if the current path starts with the prefix
if current_path.starts_with(prefix) {
// If this is a leaf node, add it to the results
if node.is_leaf {
result.push(current_path.to_string());
}
// Collect all keys from this subtree
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
self.find_keys_with_prefix(child.node_id, &child_path, prefix, result)?;
}
}
return Ok(());
}
// Current path is shorter than the prefix, continue searching
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
// Check if this child's path could potentially match the prefix
if prefix.starts_with(current_path) {
// The prefix starts with the current path, so we need to check if
// the child's key_part matches the next part of the prefix
let prefix_remainder = &prefix[current_path.len()..];
// If the prefix remainder starts with the child's key_part or vice versa
if prefix_remainder.starts_with(&child.key_part)
|| (child.key_part.starts_with(prefix_remainder)
&& child.key_part.len() >= prefix_remainder.len()) {
self.find_keys_with_prefix(child.node_id, &child_path, prefix, result)?;
}
}
}
Ok(())
}
/// Helper function to recursively collect all keys under a node.
fn collect_all_keys(
&mut self,
node_id: u32,
current_path: &str,
result: &mut Vec<String>,
) -> Result<(), Error> {
let node = self.get_node(node_id)?;
// If this node is a leaf, add its path to the result
if node.is_leaf {
result.push(current_path.to_string());
}
// Recursively collect keys from all children
for child in &node.children {
let child_path = format!("{}{}", current_path, child.key_part);
self.collect_all_keys(child.node_id, &child_path, result)?;
}
Ok(())
}
}

View File

@@ -0,0 +1,156 @@
//! Serialization and deserialization for RadixTree nodes.
use crate::error::Error;
use crate::node::{Node, NodeRef};
use std::io::{Cursor, Read};
use std::mem::size_of;
/// Current binary format version.
const VERSION: u8 = 1;
impl Node {
/// Serializes a node to bytes for storage.
pub fn serialize(&self) -> Vec<u8> {
let mut buffer = Vec::new();
// Add version byte
buffer.push(VERSION);
// Add key segment
write_string(&mut buffer, &self.key_segment);
// Add value as []u8
write_u16(&mut buffer, self.value.len() as u16);
buffer.extend_from_slice(&self.value);
// Add children
write_u16(&mut buffer, self.children.len() as u16);
for child in &self.children {
write_string(&mut buffer, &child.key_part);
write_u32(&mut buffer, child.node_id);
}
// Add leaf flag
buffer.push(if self.is_leaf { 1 } else { 0 });
buffer
}
/// Deserializes bytes to a node.
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
if data.is_empty() {
return Err(Error::Deserialization("Empty data".to_string()));
}
let mut cursor = Cursor::new(data);
// Read and verify version
let mut version_byte = [0u8; 1];
cursor.read_exact(&mut version_byte)
.map_err(|e| Error::Deserialization(format!("Failed to read version byte: {}", e)))?;
if version_byte[0] != VERSION {
return Err(Error::Deserialization(
format!("Invalid version byte: expected {}, got {}", VERSION, version_byte[0])
));
}
// Read key segment
let key_segment = read_string(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read key segment: {}", e)))?;
// Read value as []u8
let value_len = read_u16(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read value length: {}", e)))?;
let mut value = vec![0u8; value_len as usize];
cursor.read_exact(&mut value)
.map_err(|e| Error::Deserialization(format!("Failed to read value: {}", e)))?;
// Read children
let children_len = read_u16(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read children length: {}", e)))?;
let mut children = Vec::with_capacity(children_len as usize);
for _ in 0..children_len {
let key_part = read_string(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read child key part: {}", e)))?;
let node_id = read_u32(&mut cursor)
.map_err(|e| Error::Deserialization(format!("Failed to read child node ID: {}", e)))?;
children.push(NodeRef {
key_part,
node_id,
});
}
// Read leaf flag
let mut is_leaf_byte = [0u8; 1];
cursor.read_exact(&mut is_leaf_byte)
.map_err(|e| Error::Deserialization(format!("Failed to read leaf flag: {}", e)))?;
let is_leaf = is_leaf_byte[0] == 1;
Ok(Node {
key_segment,
value,
children,
is_leaf,
})
}
}
// Helper functions for serialization
fn write_string(buffer: &mut Vec<u8>, s: &str) {
let bytes = s.as_bytes();
write_u16(buffer, bytes.len() as u16);
buffer.extend_from_slice(bytes);
}
fn write_u16(buffer: &mut Vec<u8>, value: u16) {
buffer.extend_from_slice(&value.to_le_bytes());
}
fn write_u32(buffer: &mut Vec<u8>, value: u32) {
buffer.extend_from_slice(&value.to_le_bytes());
}
// Helper functions for deserialization
fn read_string(cursor: &mut Cursor<&[u8]>) -> std::io::Result<String> {
let len = read_u16(cursor)? as usize;
let mut bytes = vec![0u8; len];
cursor.read_exact(&mut bytes)?;
String::from_utf8(bytes)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}
fn read_u16(cursor: &mut Cursor<&[u8]>) -> std::io::Result<u16> {
let mut bytes = [0u8; size_of::<u16>()];
cursor.read_exact(&mut bytes)?;
Ok(u16::from_le_bytes(bytes))
}
fn read_u32(cursor: &mut Cursor<&[u8]>) -> std::io::Result<u32> {
let mut bytes = [0u8; size_of::<u32>()];
cursor.read_exact(&mut bytes)?;
Ok(u32::from_le_bytes(bytes))
}
/// Helper function to get the common prefix of two strings.
pub fn get_common_prefix(a: &str, b: &str) -> String {
let mut i = 0;
let a_bytes = a.as_bytes();
let b_bytes = b.as_bytes();
while i < a.len() && i < b.len() && a_bytes[i] == b_bytes[i] {
i += 1;
}
a[..i].to_string()
}