prevent unauthorized access to administrative db0 when connection to redis-cli

This commit is contained in:
Maxime Van Hees
2025-10-07 10:52:30 +02:00
parent 2139deb85d
commit a8720c06db
5 changed files with 525 additions and 5 deletions

View File

@@ -47,18 +47,24 @@ HeroDB can be interacted with using any standard Redis client, such as `redis-cl
### Example with `redis-cli` ### Example with `redis-cli`
Connections start with no database selected. You must SELECT a database first.
- To work in the admin database (DB 0), authenticate with the admin secret:
```bash ```bash
redis-cli -p 6379 SELECT 0 KEY myadminsecret
redis-cli -p 6379 SET mykey "Hello from HeroDB!" redis-cli -p 6379 SET mykey "Hello from HeroDB!"
redis-cli -p 6379 GET mykey redis-cli -p 6379 GET mykey
# → "Hello from HeroDB!" # → "Hello from HeroDB!"
```
- To use a user database, first create one via the JSON-RPC API (see docs/rpc_examples.md), then select it:
```bash
# Suppose RPC created database id 1
redis-cli -p 6379 SELECT 1
redis-cli -p 6379 HSET user:1 name "Alice" age "30" redis-cli -p 6379 HSET user:1 name "Alice" age "30"
redis-cli -p 6379 HGET user:1 name redis-cli -p 6379 HGET user:1 name
# → "Alice" # → "Alice"
redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10 redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10
# → 1) "0"
# 2) 1) "user:1"
``` ```
## Cryptography ## Cryptography

View File

@@ -80,6 +80,7 @@ Keys in `DB 0` (internal layout, but useful to understand how things work):
- Requires the exact admin secret as the `KEY` argument to `SELECT 0` - Requires the exact admin secret as the `KEY` argument to `SELECT 0`
- Permission is `ReadWrite` when the secret matches - Permission is `ReadWrite` when the secret matches
Connections start with no database selected. Any command that requires storage (GET, SET, H*, L*, SCAN, etc.) will return an error until you issue a SELECT to choose a database. Admin DB 0 is never accessible without authenticating via SELECT 0 KEY <admin_secret>.
### How to select databases with optional `KEY` ### How to select databases with optional `KEY`
- Public DB (no key required) - Public DB (no key required)

View File

@@ -126,7 +126,9 @@ redis-cli -p 6381 --pipe < dump.rdb
## Authentication and Database Selection ## Authentication and Database Selection
HeroDB uses an `Admin DB 0` to govern database existence, access and per-db encryption. Access control is enforced via `Admin DB 0` metadata. See the full model in `docs/admin.md`. Connections start with no database selected. Any storage-backed command (GET, SET, H*, L*, SCAN, etc.) will return an error until you issue a SELECT to choose a database.
HeroDB uses an `Admin DB 0` to govern database existence, access and per-db encryption. Access control is enforced via `Admin DB 0` metadata. See the full model in (docs/admin.md:1).
Examples: Examples:
```bash ```bash
@@ -145,4 +147,10 @@ redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
# Admin DB 0 (requires admin secret) # Admin DB 0 (requires admin secret)
redis-cli -p $PORT SELECT 0 KEY my-admin-secret redis-cli -p $PORT SELECT 0 KEY my-admin-secret
# → OK # → OK
```
```bash
# Before selecting a DB, storage commands will fail
redis-cli -p $PORT GET key
# → -ERR No database selected. Use SELECT <id> [KEY <key>] first
``` ```

View File

@@ -21,6 +21,8 @@ use ureq::{Agent, AgentBuilder};
use std::time::Duration; use std::time::Duration;
use std::io::Read; use std::io::Read;
const NO_DB_SELECTED: u64 = u64::MAX;
#[derive(Clone)] #[derive(Clone)]
pub struct Server { pub struct Server {
pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>, pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>,
@@ -65,7 +67,7 @@ impl Server {
db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())), db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())),
option, option,
client_name: None, client_name: None,
selected_db: 0, selected_db: NO_DB_SELECTED,
queued_cmd: None, queued_cmd: None,
current_permissions: None, current_permissions: None,
@@ -103,6 +105,17 @@ impl Server {
} }
pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> { pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> {
// Require explicit SELECT before any storage access
if self.selected_db == NO_DB_SELECTED {
return Err(DBError("No database selected. Use SELECT <id> [KEY <key>] first".to_string()));
}
// Admin DB 0 access must be authenticated with SELECT 0 KEY <admin_secret>
if self.selected_db == 0 {
if !matches!(self.current_permissions, Some(crate::rpc::Permissions::ReadWrite)) {
return Err(DBError("Admin DB 0 requires SELECT 0 KEY <admin_secret>".to_string()));
}
}
let mut cache = self.db_cache.write().unwrap(); let mut cache = self.db_cache.write().unwrap();
if let Some(storage) = cache.get(&self.selected_db) { if let Some(storage) = cache.get(&self.selected_db) {
@@ -328,6 +341,10 @@ impl Server {
/// Check if current permissions allow read operations /// Check if current permissions allow read operations
pub fn has_read_permission(&self) -> bool { pub fn has_read_permission(&self) -> bool {
// No DB selected -> no permissions
if self.selected_db == NO_DB_SELECTED {
return false;
}
// If an explicit permission is set for this connection, honor it. // If an explicit permission is set for this connection, honor it.
if let Some(perms) = self.current_permissions.as_ref() { if let Some(perms) = self.current_permissions.as_ref() {
return matches!(*perms, crate::rpc::Permissions::Read | crate::rpc::Permissions::ReadWrite); return matches!(*perms, crate::rpc::Permissions::Read | crate::rpc::Permissions::ReadWrite);
@@ -347,6 +364,10 @@ impl Server {
/// Check if current permissions allow write operations /// Check if current permissions allow write operations
pub fn has_write_permission(&self) -> bool { pub fn has_write_permission(&self) -> bool {
// No DB selected -> no permissions
if self.selected_db == NO_DB_SELECTED {
return false;
}
// If an explicit permission is set for this connection, honor it. // If an explicit permission is set for this connection, honor it.
if let Some(perms) = self.current_permissions.as_ref() { if let Some(perms) = self.current_permissions.as_ref() {
return matches!(*perms, crate::rpc::Permissions::ReadWrite); return matches!(*perms, crate::rpc::Permissions::ReadWrite);

View File

@@ -0,0 +1,484 @@
use redis::{Client, Connection, RedisResult, Value};
use std::process::{Child, Command};
use std::time::Duration;
use jsonrpsee::http_client::{HttpClient, HttpClientBuilder};
use herodb::rpc::{BackendType, DatabaseConfig, RpcClient};
use base64::Engine;
use tokio::time::sleep;
// ------------------------
// Helpers
// ------------------------
fn get_redis_connection(port: u16) -> Connection {
let connection_info = format!("redis://127.0.0.1:{}", port);
let client = Client::open(connection_info).unwrap();
let mut attempts = 0;
loop {
match client.get_connection() {
Ok(mut conn) => {
if redis::cmd("PING").query::<String>(&mut conn).is_ok() {
return conn;
}
}
Err(e) => {
if attempts >= 3600 {
panic!("Failed to connect to Redis server after 3600 attempts: {}", e);
}
}
}
attempts += 1;
std::thread::sleep(Duration::from_millis(500));
}
}
async fn get_rpc_client(port: u16) -> HttpClient {
let url = format!("http://127.0.0.1:{}", port + 1); // RPC port = Redis port + 1
HttpClientBuilder::default().build(url).unwrap()
}
/// Wait until RPC server is responsive (getServerStats succeeds) or panic after retries.
async fn wait_for_rpc_ready(client: &HttpClient, max_attempts: u32, delay: Duration) {
for _ in 0..max_attempts {
match client.get_server_stats().await {
Ok(_) => return,
Err(_) => {
sleep(delay).await;
}
}
}
panic!("RPC server did not become ready in time");
}
// A guard to ensure the server process is killed when it goes out of scope and test dir cleaned.
struct ServerProcessGuard {
process: Child,
test_dir: String,
}
impl Drop for ServerProcessGuard {
fn drop(&mut self) {
eprintln!("Killing server process (pid: {})...", self.process.id());
if let Err(e) = self.process.kill() {
eprintln!("Failed to kill server process: {}", e);
}
match self.process.wait() {
Ok(status) => eprintln!("Server process exited with: {}", status),
Err(e) => eprintln!("Failed to wait on server process: {}", e),
}
// Clean up the specific test directory
eprintln!("Cleaning up test directory: {}", self.test_dir);
if let Err(e) = std::fs::remove_dir_all(&self.test_dir) {
eprintln!("Failed to clean up test directory: {}", e);
}
}
}
// Helper to set up the server and return guard + ports
async fn setup_server() -> (ServerProcessGuard, u16) {
use std::sync::atomic::{AtomicU16, Ordering};
static PORT_COUNTER: AtomicU16 = AtomicU16::new(17500);
let port = PORT_COUNTER.fetch_add(1, Ordering::SeqCst);
let test_dir = format!("/tmp/herodb_lance_test_{}", port);
// Clean up previous test data
if std::path::Path::new(&test_dir).exists() {
let _ = std::fs::remove_dir_all(&test_dir);
}
std::fs::create_dir_all(&test_dir).unwrap();
// Start the server in a subprocess with RPC enabled (follows tantivy test pattern)
let child = Command::new("cargo")
.args(&[
"run",
"--",
"--dir",
&test_dir,
"--port",
&port.to_string(),
"--rpc-port",
&(port + 1).to_string(),
"--enable-rpc",
"--debug",
"--admin-secret",
"test-admin",
])
.spawn()
.expect("Failed to start server process");
let guard = ServerProcessGuard {
process: child,
test_dir,
};
// Give the server time to build and start (cargo run may compile first)
// Increase significantly to accommodate first-time dependency compilation in CI.
std::thread::sleep(Duration::from_millis(60000));
(guard, port)
}
// Convenient helpers for assertions on redis::Value
fn value_is_ok(v: &Value) -> bool {
match v {
Value::Okay => true,
Value::Status(s) if s == "OK" => true,
Value::Data(d) if d == b"OK" => true,
_ => false,
}
}
fn value_is_int_eq(v: &Value, expected: i64) -> bool {
matches!(v, Value::Int(n) if *n == expected)
}
fn value_is_str_eq(v: &Value, expected: &str) -> bool {
match v {
Value::Status(s) => s == expected,
Value::Data(d) => String::from_utf8_lossy(d) == expected,
_ => false,
}
}
fn to_string_lossy(v: &Value) -> String {
match v {
Value::Nil => "Nil".to_string(),
Value::Int(n) => n.to_string(),
Value::Status(s) => s.clone(),
Value::Okay => "OK".to_string(),
Value::Data(d) => String::from_utf8_lossy(d).to_string(),
Value::Bulk(items) => {
let inner: Vec<String> = items.iter().map(to_string_lossy).collect();
format!("[{}]", inner.join(", "))
}
}
}
// Extract ids from LANCE.SEARCH / LANCE.SEARCHIMAGE reply which is:
// Array of elements: [ [id, score, [k,v,...]], [id, score, ...], ... ]
fn extract_hit_ids(v: &Value) -> Vec<String> {
let mut ids = Vec::new();
if let Value::Bulk(items) = v {
for item in items {
if let Value::Bulk(row) = item {
if !row.is_empty() {
// first element is id (Data or Status)
let id = match &row[0] {
Value::Data(d) => String::from_utf8_lossy(d).to_string(),
Value::Status(s) => s.clone(),
Value::Int(n) => n.to_string(),
_ => continue,
};
ids.push(id);
}
}
}
}
ids
}
// Check whether a Bulk array (RESP array) contains a given string element.
fn bulk_contains_string(v: &Value, needle: &str) -> bool {
match v {
Value::Bulk(items) => items.iter().any(|it| match it {
Value::Data(d) => String::from_utf8_lossy(d).contains(needle),
Value::Status(s) => s.contains(needle),
Value::Bulk(_) => bulk_contains_string(it, needle),
_ => false,
}),
_ => false,
}
}
// ------------------------
// Test: Lance end-to-end (RESP) using only local embedders
// ------------------------
#[tokio::test]
async fn test_lance_end_to_end() {
let (_guard, port) = setup_server().await;
// First, wait for RESP to be available; this also gives cargo-run child ample time to finish building.
// Reuse the helper that retries PING until success.
{
let _conn_ready = get_redis_connection(port);
// Drop immediately; we only needed readiness.
}
// Build RPC client and create a Lance DB
let rpc_client = get_rpc_client(port).await;
// Ensure RPC server is listening before we issue createDatabase (allow longer warm-up to accommodate first-build costs)
wait_for_rpc_ready(&rpc_client, 3600, Duration::from_millis(250)).await;
let db_config = DatabaseConfig {
name: Some("media-db".to_string()),
storage_path: None,
max_size: None,
redis_version: None,
};
let db_id = rpc_client
.create_database(BackendType::Lance, db_config, None)
.await
.expect("create_database Lance failed");
assert_eq!(db_id, 1, "Expected first Lance DB id to be 1");
// Add access keys
let _ = rpc_client
.add_access_key(db_id, "readwrite_key".to_string(), "readwrite".to_string())
.await
.expect("add_access_key readwrite failed");
let _ = rpc_client
.add_access_key(db_id, "read_key".to_string(), "read".to_string())
.await
.expect("add_access_key read failed");
// Connect to Redis and SELECT DB with readwrite key
let mut conn = get_redis_connection(port);
let sel_ok: RedisResult<String> = redis::cmd("SELECT")
.arg(db_id)
.arg("KEY")
.arg("readwrite_key")
.query(&mut conn);
assert!(sel_ok.is_ok(), "SELECT db with key failed: {:?}", sel_ok);
assert_eq!(sel_ok.unwrap(), "OK");
// 1) Configure embedding providers: textset -> testhash dim 64, imageset -> testimagehash dim 512
let v = redis::cmd("LANCE.EMBEDDING")
.arg("CONFIG")
.arg("SET")
.arg("textset")
.arg("PROVIDER")
.arg("testhash")
.arg("MODEL")
.arg("any")
.arg("PARAM")
.arg("dim")
.arg("64")
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "Embedding config set (text) not OK: {}", to_string_lossy(&v));
let v = redis::cmd("LANCE.EMBEDDING")
.arg("CONFIG")
.arg("SET")
.arg("imageset")
.arg("PROVIDER")
.arg("testimagehash")
.arg("MODEL")
.arg("any")
.arg("PARAM")
.arg("dim")
.arg("512")
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "Embedding config set (image) not OK: {}", to_string_lossy(&v));
// 2) Create datasets
let v = redis::cmd("LANCE.CREATE")
.arg("textset")
.arg("DIM")
.arg(64)
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "LANCE.CREATE textset failed: {}", to_string_lossy(&v));
let v = redis::cmd("LANCE.CREATE")
.arg("imageset")
.arg("DIM")
.arg(512)
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "LANCE.CREATE imageset failed: {}", to_string_lossy(&v));
// 3) Store two text documents
let v = redis::cmd("LANCE.STORE")
.arg("textset")
.arg("ID")
.arg("doc-1")
.arg("TEXT")
.arg("The quick brown fox jumps over the lazy dog")
.arg("META")
.arg("title")
.arg("Fox")
.arg("category")
.arg("animal")
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "LANCE.STORE doc-1 failed: {}", to_string_lossy(&v));
let v = redis::cmd("LANCE.STORE")
.arg("textset")
.arg("ID")
.arg("doc-2")
.arg("TEXT")
.arg("A fast auburn fox vaulted a sleepy canine")
.arg("META")
.arg("title")
.arg("Paraphrase")
.arg("category")
.arg("animal")
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "LANCE.STORE doc-2 failed: {}", to_string_lossy(&v));
// 4) Store two images via BYTES (local fake bytes; embedder only hashes bytes, not decoding)
let img1: Vec<u8> = b"local-image-bytes-1-abcdefghijklmnopqrstuvwxyz".to_vec();
let img2: Vec<u8> = b"local-image-bytes-2-ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_vec();
let img1_b64 = base64::engine::general_purpose::STANDARD.encode(&img1);
let img2_b64 = base64::engine::general_purpose::STANDARD.encode(&img2);
let v = redis::cmd("LANCE.STOREIMAGE")
.arg("imageset")
.arg("ID")
.arg("img-1")
.arg("BYTES")
.arg(&img1_b64)
.arg("META")
.arg("title")
.arg("Local1")
.arg("group")
.arg("demo")
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "LANCE.STOREIMAGE img-1 failed: {}", to_string_lossy(&v));
let v = redis::cmd("LANCE.STOREIMAGE")
.arg("imageset")
.arg("ID")
.arg("img-2")
.arg("BYTES")
.arg(&img2_b64)
.arg("META")
.arg("title")
.arg("Local2")
.arg("group")
.arg("demo")
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "LANCE.STOREIMAGE img-2 failed: {}", to_string_lossy(&v));
// 5) Search text: K 2 QUERY "quick brown fox" RETURN 1 title
let v = redis::cmd("LANCE.SEARCH")
.arg("textset")
.arg("K")
.arg(2)
.arg("QUERY")
.arg("quick brown fox")
.arg("RETURN")
.arg(1)
.arg("title")
.query::<Value>(&mut conn)
.unwrap();
// Should be an array of hits
let ids = extract_hit_ids(&v);
assert!(
ids.contains(&"doc-1".to_string()) || ids.contains(&"doc-2".to_string()),
"LANCE.SEARCH should return doc-1/doc-2; got: {}",
to_string_lossy(&v)
);
// With FILTER on category
let v = redis::cmd("LANCE.SEARCH")
.arg("textset")
.arg("K")
.arg(2)
.arg("QUERY")
.arg("fox jumps")
.arg("FILTER")
.arg("category = 'animal'")
.arg("RETURN")
.arg(1)
.arg("title")
.query::<Value>(&mut conn)
.unwrap();
let ids_f = extract_hit_ids(&v);
assert!(
!ids_f.is_empty(),
"Filtered LANCE.SEARCH should return at least one document; got: {}",
to_string_lossy(&v)
);
// 6) Search images with QUERYBYTES
let query_img: Vec<u8> = b"local-image-query-3-1234567890".to_vec();
let query_img_b64 = base64::engine::general_purpose::STANDARD.encode(&query_img);
let v = redis::cmd("LANCE.SEARCHIMAGE")
.arg("imageset")
.arg("K")
.arg(2)
.arg("QUERYBYTES")
.arg(&query_img_b64)
.arg("RETURN")
.arg(1)
.arg("title")
.query::<Value>(&mut conn)
.unwrap();
// Should get 2 hits (img-1 and img-2) in some order; assert array non-empty
let img_ids = extract_hit_ids(&v);
assert!(
!img_ids.is_empty(),
"LANCE.SEARCHIMAGE should return non-empty results; got: {}",
to_string_lossy(&v)
);
// 7) Inspect datasets
let v = redis::cmd("LANCE.LIST").query::<Value>(&mut conn).unwrap();
assert!(
bulk_contains_string(&v, "textset"),
"LANCE.LIST missing textset: {}",
to_string_lossy(&v)
);
assert!(
bulk_contains_string(&v, "imageset"),
"LANCE.LIST missing imageset: {}",
to_string_lossy(&v)
);
// INFO textset
let info_text = redis::cmd("LANCE.INFO")
.arg("textset")
.query::<Value>(&mut conn)
.unwrap();
// INFO returns Array [k,v,k,v,...] including "dimension" "64" and "row_count" "...".
let info_str = to_string_lossy(&info_text);
assert!(
info_str.contains("dimension") && info_str.contains("64"),
"LANCE.INFO textset should include dimension 64; got: {}",
info_str
);
// 8) Delete by id and drop datasets
let v = redis::cmd("LANCE.DEL")
.arg("textset")
.arg("doc-2")
.query::<Value>(&mut conn)
.unwrap();
// Returns SimpleString "1" or Int 1 depending on encoding path; accept either
assert!(
value_is_int_eq(&v, 1) || value_is_str_eq(&v, "1"),
"LANCE.DEL doc-2 expected 1; got {}",
to_string_lossy(&v)
);
let v = redis::cmd("LANCE.DROP")
.arg("textset")
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "LANCE.DROP textset failed: {}", to_string_lossy(&v));
let v = redis::cmd("LANCE.DROP")
.arg("imageset")
.query::<Value>(&mut conn)
.unwrap();
assert!(value_is_ok(&v), "LANCE.DROP imageset failed: {}", to_string_lossy(&v));
}