...
This commit is contained in:
134
tst/src/serialize.rs
Normal file
134
tst/src/serialize.rs
Normal file
@@ -0,0 +1,134 @@
|
||||
//! Serialization and deserialization for TST nodes.
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::node::TSTNode;
|
||||
|
||||
/// Current binary format version.
|
||||
const VERSION: u8 = 1;
|
||||
|
||||
impl TSTNode {
|
||||
/// Serializes a node to bytes for storage.
|
||||
pub fn serialize(&self) -> Vec<u8> {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
// Version
|
||||
buffer.push(VERSION);
|
||||
|
||||
// Character (as UTF-32)
|
||||
let char_bytes = (self.character as u32).to_le_bytes();
|
||||
buffer.extend_from_slice(&char_bytes);
|
||||
|
||||
// Is end of key
|
||||
buffer.push(if self.is_end_of_key { 1 } else { 0 });
|
||||
|
||||
// Value (only if is_end_of_key)
|
||||
if self.is_end_of_key {
|
||||
let value_len = (self.value.len() as u32).to_le_bytes();
|
||||
buffer.extend_from_slice(&value_len);
|
||||
buffer.extend_from_slice(&self.value);
|
||||
} else {
|
||||
// Zero length
|
||||
buffer.extend_from_slice(&[0, 0, 0, 0]);
|
||||
}
|
||||
|
||||
// Child pointers
|
||||
let left_id = self.left_id.unwrap_or(0).to_le_bytes();
|
||||
buffer.extend_from_slice(&left_id);
|
||||
|
||||
let middle_id = self.middle_id.unwrap_or(0).to_le_bytes();
|
||||
buffer.extend_from_slice(&middle_id);
|
||||
|
||||
let right_id = self.right_id.unwrap_or(0).to_le_bytes();
|
||||
buffer.extend_from_slice(&right_id);
|
||||
|
||||
buffer
|
||||
}
|
||||
|
||||
/// Deserializes bytes to a node.
|
||||
pub fn deserialize(data: &[u8]) -> Result<Self, Error> {
|
||||
if data.len() < 14 { // Minimum size: version + char + is_end + value_len + 3 child IDs
|
||||
return Err(Error::Deserialization("Data too short".to_string()));
|
||||
}
|
||||
|
||||
let mut pos = 0;
|
||||
|
||||
// Version
|
||||
let version = data[pos];
|
||||
pos += 1;
|
||||
|
||||
if version != VERSION {
|
||||
return Err(Error::Deserialization(format!("Unsupported version: {}", version)));
|
||||
}
|
||||
|
||||
// Character
|
||||
let char_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||
let char_code = u32::from_le_bytes(char_bytes);
|
||||
let character = char::from_u32(char_code)
|
||||
.ok_or_else(|| Error::Deserialization("Invalid character".to_string()))?;
|
||||
pos += 4;
|
||||
|
||||
// Is end of key
|
||||
let is_end_of_key = data[pos] != 0;
|
||||
pos += 1;
|
||||
|
||||
// Value length
|
||||
let value_len_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||
let value_len = u32::from_le_bytes(value_len_bytes) as usize;
|
||||
pos += 4;
|
||||
|
||||
// Value
|
||||
let value = if value_len > 0 {
|
||||
if pos + value_len > data.len() {
|
||||
return Err(Error::Deserialization("Value length exceeds data".to_string()));
|
||||
}
|
||||
data[pos..pos+value_len].to_vec()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
pos += value_len;
|
||||
|
||||
// Child pointers
|
||||
if pos + 12 > data.len() {
|
||||
return Err(Error::Deserialization("Data too short for child pointers".to_string()));
|
||||
}
|
||||
|
||||
let left_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||
let left_id = u32::from_le_bytes(left_id_bytes);
|
||||
pos += 4;
|
||||
|
||||
let middle_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||
let middle_id = u32::from_le_bytes(middle_id_bytes);
|
||||
pos += 4;
|
||||
|
||||
let right_id_bytes = [data[pos], data[pos+1], data[pos+2], data[pos+3]];
|
||||
let right_id = u32::from_le_bytes(right_id_bytes);
|
||||
|
||||
Ok(TSTNode {
|
||||
character,
|
||||
value,
|
||||
is_end_of_key,
|
||||
left_id: if left_id == 0 { None } else { Some(left_id) },
|
||||
middle_id: if middle_id == 0 { None } else { Some(middle_id) },
|
||||
right_id: if right_id == 0 { None } else { Some(right_id) },
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the common prefix of two strings.
|
||||
pub fn get_common_prefix(a: &str, b: &str) -> String {
|
||||
let mut result = String::new();
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
|
||||
let min_len = a_chars.len().min(b_chars.len());
|
||||
|
||||
for i in 0..min_len {
|
||||
if a_chars[i] == b_chars[i] {
|
||||
result.push(a_chars[i]);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
Reference in New Issue
Block a user