WIP2: implementing lancedb: created embedding abstraction, server-side per-dataset embedding config + updates RPC endpoints
This commit is contained in:
@@ -14,6 +14,10 @@ use crate::protocol::Protocol;
|
||||
use crate::storage_trait::StorageBackend;
|
||||
use crate::admin_meta;
|
||||
|
||||
// Embeddings: config and cache
|
||||
use crate::embedding::{EmbeddingConfig, create_embedder, Embedder};
|
||||
use serde_json;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Server {
|
||||
pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>,
|
||||
@@ -29,6 +33,9 @@ pub struct Server {
|
||||
// Per-DB Lance stores (vector DB), keyed by db_id
|
||||
pub lance_stores: Arc<std::sync::RwLock<HashMap<u64, Arc<crate::lance_store::LanceStore>>>>,
|
||||
|
||||
// Per-(db_id, dataset) embedder cache
|
||||
pub embedders: Arc<std::sync::RwLock<HashMap<(u64, String), Arc<dyn Embedder>>>>,
|
||||
|
||||
// BLPOP waiter registry: per (db_index, key) FIFO of waiters
|
||||
pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>,
|
||||
pub waiter_seq: Arc<AtomicU64>,
|
||||
@@ -58,6 +65,7 @@ impl Server {
|
||||
|
||||
search_indexes: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
||||
lance_stores: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
||||
embedders: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
||||
list_waiters: Arc::new(Mutex::new(HashMap::new())),
|
||||
waiter_seq: Arc::new(AtomicU64::new(1)),
|
||||
}
|
||||
@@ -153,6 +161,78 @@ impl Server {
|
||||
Ok(store)
|
||||
}
|
||||
|
||||
// ----- Embedding configuration and resolution -----
|
||||
|
||||
// Sidecar embedding config path: <base_dir>/lance/<db_id>/<dataset>.lance.embedding.json
|
||||
fn dataset_embedding_config_path(&self, dataset: &str) -> std::path::PathBuf {
|
||||
let mut base = self.lance_data_path();
|
||||
// Ensure parent dir exists
|
||||
if !base.exists() {
|
||||
let _ = std::fs::create_dir_all(&base);
|
||||
}
|
||||
base.push(format!("{}.lance.embedding.json", dataset));
|
||||
base
|
||||
}
|
||||
|
||||
/// Persist per-dataset embedding config as JSON sidecar.
|
||||
pub fn set_dataset_embedding_config(&self, dataset: &str, cfg: &EmbeddingConfig) -> Result<(), DBError> {
|
||||
if self.selected_db == 0 {
|
||||
return Err(DBError("Lance not available on admin DB 0".to_string()));
|
||||
}
|
||||
let p = self.dataset_embedding_config_path(dataset);
|
||||
let data = serde_json::to_vec_pretty(cfg)
|
||||
.map_err(|e| DBError(format!("Failed to serialize embedding config: {}", e)))?;
|
||||
std::fs::write(&p, data)
|
||||
.map_err(|e| DBError(format!("Failed to write embedding config {}: {}", p.display(), e)))?;
|
||||
// Invalidate embedder cache entry for this dataset
|
||||
{
|
||||
let mut map = self.embedders.write().unwrap();
|
||||
map.remove(&(self.selected_db, dataset.to_string()));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load per-dataset embedding config.
|
||||
pub fn get_dataset_embedding_config(&self, dataset: &str) -> Result<EmbeddingConfig, DBError> {
|
||||
if self.selected_db == 0 {
|
||||
return Err(DBError("Lance not available on admin DB 0".to_string()));
|
||||
}
|
||||
let p = self.dataset_embedding_config_path(dataset);
|
||||
if !p.exists() {
|
||||
return Err(DBError(format!(
|
||||
"Embedding config not set for dataset '{}'. Use LANCE.EMBEDDING CONFIG SET ... or RPC to configure.",
|
||||
dataset
|
||||
)));
|
||||
}
|
||||
let data = std::fs::read(&p)
|
||||
.map_err(|e| DBError(format!("Failed to read embedding config {}: {}", p.display(), e)))?;
|
||||
let cfg: EmbeddingConfig = serde_json::from_slice(&data)
|
||||
.map_err(|e| DBError(format!("Failed to parse embedding config {}: {}", p.display(), e)))?;
|
||||
Ok(cfg)
|
||||
}
|
||||
|
||||
/// Resolve or build an embedder for (db_id, dataset). Caches instance.
|
||||
pub fn get_embedder_for(&self, dataset: &str) -> Result<Arc<dyn Embedder>, DBError> {
|
||||
if self.selected_db == 0 {
|
||||
return Err(DBError("Lance not available on admin DB 0".to_string()));
|
||||
}
|
||||
// Fast path
|
||||
{
|
||||
let map = self.embedders.read().unwrap();
|
||||
if let Some(e) = map.get(&(self.selected_db, dataset.to_string())) {
|
||||
return Ok(e.clone());
|
||||
}
|
||||
}
|
||||
// Load config and instantiate
|
||||
let cfg = self.get_dataset_embedding_config(dataset)?;
|
||||
let emb = create_embedder(&cfg)?;
|
||||
{
|
||||
let mut map = self.embedders.write().unwrap();
|
||||
map.insert((self.selected_db, dataset.to_string()), emb.clone());
|
||||
}
|
||||
Ok(emb)
|
||||
}
|
||||
|
||||
/// Check if current permissions allow read operations
|
||||
pub fn has_read_permission(&self) -> bool {
|
||||
// If an explicit permission is set for this connection, honor it.
|
||||
|
Reference in New Issue
Block a user