use std::fs::{self, File, OpenOptions}; use std::io::{Read, Seek, SeekFrom, Write}; use std::path::Path; use crate::error::Error; use crate::location::Location; const DATA_FILE_NAME: &str = "data"; const INCREMENTAL_FILE_NAME: &str = ".inc"; /// Configuration for creating a new lookup table pub struct LookupConfig { /// Size of the lookup table pub size: u32, /// Size of each entry in bytes (2-6) /// - 2: For databases with < 65,536 records (single file) /// - 3: For databases with < 16,777,216 records (single file) /// - 4: For databases with < 4,294,967,296 records (single file) /// - 6: For large databases requiring multiple files pub keysize: u8, /// Path for disk-based lookup pub lookuppath: String, /// Whether to use incremental mode pub incremental_mode: bool, } /// Lookup table maps keys to physical locations in the backend storage pub struct LookupTable { /// Size of each entry in bytes (2-6) keysize: u8, /// Path for disk-based lookup lookuppath: String, /// In-memory data for memory-based lookup data: Vec, /// Next empty slot if incremental mode is enabled incremental: Option, } impl LookupTable { /// Returns the keysize of this lookup table pub fn keysize(&self) -> u8 { self.keysize } /// Creates a new lookup table with the given configuration pub fn new(config: LookupConfig) -> Result { // Verify keysize is valid if ![2, 3, 4, 6].contains(&config.keysize) { return Err(Error::InvalidOperation(format!("Invalid keysize: {}", config.keysize))); } let incremental = if config.incremental_mode { Some(get_incremental_info(&config)?) } else { None }; if !config.lookuppath.is_empty() { // Create directory if it doesn't exist fs::create_dir_all(&config.lookuppath)?; // For disk-based lookup, create empty file if it doesn't exist let data_path = Path::new(&config.lookuppath).join(DATA_FILE_NAME); if !data_path.exists() { let data = vec![0u8; config.size as usize * config.keysize as usize]; fs::write(&data_path, &data)?; } Ok(LookupTable { data: Vec::new(), keysize: config.keysize, lookuppath: config.lookuppath, incremental, }) } else { // For memory-based lookup Ok(LookupTable { data: vec![0u8; config.size as usize * config.keysize as usize], keysize: config.keysize, lookuppath: String::new(), incremental, }) } } /// Gets a location for the given ID pub fn get(&self, id: u32) -> Result { let entry_size = self.keysize as usize; if !self.lookuppath.is_empty() { // Disk-based lookup let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); // Check file size first let file_size = fs::metadata(&data_path)?.len(); let start_pos = id as u64 * entry_size as u64; if start_pos + entry_size as u64 > file_size { return Err(Error::LookupError(format!( "Invalid read for get in lut: {}: {} would exceed file size {}", self.lookuppath, start_pos + entry_size as u64, file_size ))); } // Read directly from file let mut file = File::open(&data_path)?; file.seek(SeekFrom::Start(start_pos))?; let mut data = vec![0u8; entry_size]; let bytes_read = file.read(&mut data)?; if bytes_read < entry_size { return Err(Error::LookupError(format!( "Incomplete read: expected {} bytes but got {}", entry_size, bytes_read ))); } return Location::from_bytes(&data, self.keysize); } // Memory-based lookup if (id * self.keysize as u32) as usize >= self.data.len() { return Err(Error::LookupError("Index out of bounds".to_string())); } let start = (id * self.keysize as u32) as usize; let end = start + entry_size; Location::from_bytes(&self.data[start..end], self.keysize) } /// Sets a location for the given ID pub fn set(&mut self, id: u32, location: Location) -> Result<(), Error> { let entry_size = self.keysize as usize; // Handle incremental mode if let Some(incremental) = self.incremental { if id == incremental { self.increment_index()?; } if id > incremental { return Err(Error::InvalidOperation( "Cannot set ID for insertions when incremental mode is enabled".to_string() )); } } // Convert location to bytes based on keysize let location_bytes = match self.keysize { 2 => { if location.file_nr != 0 { return Err(Error::InvalidOperation("file_nr must be 0 for keysize=2".to_string())); } if location.position > 0xFFFF { return Err(Error::InvalidOperation( "position exceeds max value for keysize=2 (max 65535)".to_string() )); } vec![(location.position >> 8) as u8, location.position as u8] }, 3 => { if location.file_nr != 0 { return Err(Error::InvalidOperation("file_nr must be 0 for keysize=3".to_string())); } if location.position > 0xFFFFFF { return Err(Error::InvalidOperation( "position exceeds max value for keysize=3 (max 16777215)".to_string() )); } vec![ (location.position >> 16) as u8, (location.position >> 8) as u8, location.position as u8 ] }, 4 => { if location.file_nr != 0 { return Err(Error::InvalidOperation("file_nr must be 0 for keysize=4".to_string())); } vec![ (location.position >> 24) as u8, (location.position >> 16) as u8, (location.position >> 8) as u8, location.position as u8 ] }, 6 => { // Full location with file_nr and position location.to_bytes() }, _ => return Err(Error::InvalidOperation(format!("Invalid keysize: {}", self.keysize))), }; if !self.lookuppath.is_empty() { // Disk-based lookup let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); let mut file = OpenOptions::new().write(true).open(data_path)?; let start_pos = id as u64 * entry_size as u64; file.seek(SeekFrom::Start(start_pos))?; file.write_all(&location_bytes)?; } else { // Memory-based lookup let start = (id * self.keysize as u32) as usize; if start + entry_size > self.data.len() { return Err(Error::LookupError("Index out of bounds".to_string())); } for (i, &byte) in location_bytes.iter().enumerate() { self.data[start + i] = byte; } } Ok(()) } /// Deletes an entry for the given ID pub fn delete(&mut self, id: u32) -> Result<(), Error> { // Set location to all zeros self.set(id, Location::default()) } /// Gets the next available ID in incremental mode pub fn get_next_id(&self) -> Result { let incremental = self.incremental.ok_or_else(|| Error::InvalidOperation("Lookup table not in incremental mode".to_string()) )?; let table_size = if !self.lookuppath.is_empty() { let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); fs::metadata(data_path)?.len() as u32 } else { self.data.len() as u32 }; if incremental * self.keysize as u32 >= table_size { return Err(Error::LookupError("Lookup table is full".to_string())); } Ok(incremental) } /// Increments the index in incremental mode pub fn increment_index(&mut self) -> Result<(), Error> { let mut incremental = self.incremental.ok_or_else(|| Error::InvalidOperation("Lookup table not in incremental mode".to_string()) )?; incremental += 1; self.incremental = Some(incremental); if !self.lookuppath.is_empty() { let inc_path = Path::new(&self.lookuppath).join(INCREMENTAL_FILE_NAME); fs::write(inc_path, incremental.to_string())?; } Ok(()) } /// Exports the lookup table to a file pub fn export_data(&self, path: &str) -> Result<(), Error> { if !self.lookuppath.is_empty() { // For disk-based lookup, just copy the file let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); fs::copy(data_path, path)?; } else { // For memory-based lookup, write the data to file fs::write(path, &self.data)?; } Ok(()) } /// Imports the lookup table from a file pub fn import_data(&mut self, path: &str) -> Result<(), Error> { if !self.lookuppath.is_empty() { // For disk-based lookup, copy the file let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); fs::copy(path, data_path)?; } else { // For memory-based lookup, read the data from file self.data = fs::read(path)?; } Ok(()) } /// Exports only non-zero entries to save space pub fn export_sparse(&self, path: &str) -> Result<(), Error> { let mut output = Vec::new(); let entry_size = self.keysize as usize; if !self.lookuppath.is_empty() { // For disk-based lookup let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); let mut file = File::open(&data_path)?; let file_size = fs::metadata(&data_path)?.len(); let max_entries = file_size / entry_size as u64; for id in 0..max_entries { file.seek(SeekFrom::Start(id * entry_size as u64))?; let mut buffer = vec![0u8; entry_size]; let bytes_read = file.read(&mut buffer)?; if bytes_read < entry_size { break; } // Check if entry is non-zero if buffer.iter().any(|&b| b != 0) { // Write ID (4 bytes) + entry output.extend_from_slice(&(id as u32).to_be_bytes()); output.extend_from_slice(&buffer); } } } else { // For memory-based lookup let max_entries = self.data.len() / entry_size; for id in 0..max_entries { let start = id * entry_size; let entry = &self.data[start..start + entry_size]; // Check if entry is non-zero if entry.iter().any(|&b| b != 0) { // Write ID (4 bytes) + entry output.extend_from_slice(&(id as u32).to_be_bytes()); output.extend_from_slice(entry); } } } // Write the output to file fs::write(path, &output)?; Ok(()) } /// Imports sparse data (only non-zero entries) pub fn import_sparse(&mut self, path: &str) -> Result<(), Error> { let data = fs::read(path)?; let entry_size = self.keysize as usize; let record_size = 4 + entry_size; // ID (4 bytes) + entry if data.len() % record_size != 0 { return Err(Error::DataCorruption( "Invalid sparse data format: size mismatch".to_string() )); } for chunk_start in (0..data.len()).step_by(record_size) { if chunk_start + record_size > data.len() { break; } // Extract ID (4 bytes) let id_bytes = &data[chunk_start..chunk_start + 4]; let id = u32::from_be_bytes([id_bytes[0], id_bytes[1], id_bytes[2], id_bytes[3]]); // Extract entry let entry = &data[chunk_start + 4..chunk_start + record_size]; // Create location from entry let location = Location::from_bytes(entry, self.keysize)?; // Set the entry self.set(id, location)?; } Ok(()) } /// Finds the highest ID with a non-zero entry pub fn find_last_entry(&mut self) -> Result { let mut last_id = 0u32; let entry_size = self.keysize as usize; if !self.lookuppath.is_empty() { // For disk-based lookup let data_path = Path::new(&self.lookuppath).join(DATA_FILE_NAME); let mut file = File::open(&data_path)?; let file_size = fs::metadata(&data_path)?.len(); let mut buffer = vec![0u8; entry_size]; let mut pos = 0u32; while (pos as u64 * entry_size as u64) < file_size { file.seek(SeekFrom::Start(pos as u64 * entry_size as u64))?; let bytes_read = file.read(&mut buffer)?; if bytes_read == 0 || bytes_read < entry_size { break; } let location = Location::from_bytes(&buffer, self.keysize)?; if location.position != 0 || location.file_nr != 0 { last_id = pos; } pos += 1; } } else { // For memory-based lookup for i in 0..(self.data.len() / entry_size) as u32 { if let Ok(location) = self.get(i) { if location.position != 0 || location.file_nr != 0 { last_id = i; } } } } Ok(last_id) } } /// Helper function to get the incremental value fn get_incremental_info(config: &LookupConfig) -> Result { if !config.incremental_mode { return Ok(0); } if !config.lookuppath.is_empty() { let inc_path = Path::new(&config.lookuppath).join(INCREMENTAL_FILE_NAME); if !inc_path.exists() { // Create a separate file for storing the incremental value fs::write(&inc_path, "1")?; } let inc_str = fs::read_to_string(&inc_path)?; let incremental = match inc_str.trim().parse::() { Ok(val) => val, Err(_) => { // If the value is invalid, reset it to 1 fs::write(&inc_path, "1")?; 1 } }; Ok(incremental) } else { // For memory-based lookup, start with 1 Ok(1) } } #[cfg(test)] mod tests { use std::path::PathBuf; use super::*; use std::env::temp_dir; use std::time::{SystemTime, UNIX_EPOCH}; fn get_temp_dir() -> PathBuf { let timestamp = SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap() .as_secs(); temp_dir().join(format!("ourdb_lookup_test_{}", timestamp)) } #[test] fn test_memory_lookup() { let config = LookupConfig { size: 1000, keysize: 4, lookuppath: String::new(), incremental_mode: true, }; let mut lookup = LookupTable::new(config).unwrap(); // Test set and get let location = Location { file_nr: 0, position: 12345, }; lookup.set(1, location).unwrap(); let retrieved = lookup.get(1).unwrap(); assert_eq!(retrieved.file_nr, location.file_nr); assert_eq!(retrieved.position, location.position); // Test incremental mode let next_id = lookup.get_next_id().unwrap(); assert_eq!(next_id, 2); lookup.increment_index().unwrap(); let next_id = lookup.get_next_id().unwrap(); assert_eq!(next_id, 3); } #[test] fn test_disk_lookup() { let temp_dir = get_temp_dir(); fs::create_dir_all(&temp_dir).unwrap(); let config = LookupConfig { size: 1000, keysize: 4, lookuppath: temp_dir.to_string_lossy().to_string(), incremental_mode: true, }; let mut lookup = LookupTable::new(config).unwrap(); // Test set and get let location = Location { file_nr: 0, position: 12345, }; lookup.set(1, location).unwrap(); let retrieved = lookup.get(1).unwrap(); assert_eq!(retrieved.file_nr, location.file_nr); assert_eq!(retrieved.position, location.position); // Clean up fs::remove_dir_all(temp_dir).unwrap(); } }