Compare commits
22 Commits
vector
...
tantivy_im
Author | SHA1 | Date | |
---|---|---|---|
|
22ac4c9ed6 | ||
|
c470772a13 | ||
|
bd34fd092a | ||
|
8e044a64b7 | ||
|
87177f4a07 | ||
|
151a6ffbfa | ||
|
8ab841f68c | ||
|
8808c0e9d9 | ||
|
c6b277cc9c | ||
8331ed032b | |||
|
b8ca73397d | ||
|
1b15806a85 | ||
|
da325a9659 | ||
|
bdf363016a | ||
|
8798bc202e | ||
|
9fa9832605 | ||
|
4bb24b38dd | ||
|
f3da14b957 | ||
|
5ea34b4445 | ||
|
d9a3b711d1 | ||
|
d931770e90 | ||
|
a87ec4dbb5 |
4749
Cargo.lock
generated
4749
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
19
Cargo.toml
19
Cargo.toml
@@ -1,8 +1,8 @@
|
||||
[package]
|
||||
name = "herodb"
|
||||
version = "0.0.1"
|
||||
authors = ["Pin Fang <fpfangpin@hotmail.com>"]
|
||||
edition = "2021"
|
||||
authors = ["ThreeFold Tech NV"]
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.59"
|
||||
@@ -23,19 +23,10 @@ sha2 = "0.10"
|
||||
age = "0.10"
|
||||
secrecy = "0.8"
|
||||
ed25519-dalek = "2"
|
||||
x25519-dalek = "2"
|
||||
base64 = "0.22"
|
||||
# Lance vector database dependencies
|
||||
lance = "0.33"
|
||||
lance-index = "0.33"
|
||||
lance-linalg = "0.33"
|
||||
# Use Arrow version compatible with Lance 0.33
|
||||
arrow = "55.2"
|
||||
arrow-array = "55.2"
|
||||
arrow-schema = "55.2"
|
||||
parquet = "55.2"
|
||||
uuid = { version = "1.10", features = ["v4"] }
|
||||
reqwest = { version = "0.11", features = ["json"] }
|
||||
image = "0.25"
|
||||
jsonrpsee = { version = "0.26.0", features = ["http-client", "ws-client", "server", "macros"] }
|
||||
tantivy = "0.25.0"
|
||||
|
||||
[dev-dependencies]
|
||||
redis = { version = "0.24", features = ["aio", "tokio-comp"] }
|
||||
|
43
README.md
43
README.md
@@ -17,6 +17,8 @@ The main purpose of HeroDB is to offer a lightweight, embeddable, and Redis-comp
|
||||
- **Expiration**: Time-to-live (TTL) functionality for keys.
|
||||
- **Scanning**: Cursor-based iteration for keys and hash fields (`SCAN`, `HSCAN`).
|
||||
- **AGE Cryptography Commands**: HeroDB-specific extensions for cryptographic operations.
|
||||
- **Symmetric Encryption**: Stateless symmetric encryption using XChaCha20-Poly1305.
|
||||
- **Admin Database 0**: Centralized control for database management, access control, and per-database encryption.
|
||||
|
||||
## Quick Start
|
||||
|
||||
@@ -30,31 +32,14 @@ cargo build --release
|
||||
|
||||
### Running HeroDB
|
||||
|
||||
You can start HeroDB with different backends and encryption options:
|
||||
|
||||
#### Default `redb` Backend
|
||||
Launch HeroDB with the required `--admin-secret` flag, which encrypts the admin database (DB 0) and authorizes admin access. Optional flags include `--dir` for the database directory, `--port` for the TCP port (default 6379), `--sled` for the sled backend, and `--enable-rpc` to start the JSON-RPC management server on port 8080.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
./target/release/herodb --dir /tmp/herodb_redb --port 6379
|
||||
./target/release/herodb --dir /tmp/herodb --admin-secret myadminsecret --port 6379 --enable-rpc
|
||||
```
|
||||
|
||||
#### `sled` Backend
|
||||
|
||||
```bash
|
||||
./target/release/herodb --dir /tmp/herodb_sled --port 6379 --sled
|
||||
```
|
||||
|
||||
#### `redb` with Encryption
|
||||
|
||||
```bash
|
||||
./target/release/herodb --dir /tmp/herodb_encrypted --port 6379 --encrypt --key mysecretkey
|
||||
```
|
||||
|
||||
#### `sled` with Encryption
|
||||
|
||||
```bash
|
||||
./target/release/herodb --dir /tmp/herodb_sled_encrypted --port 6379 --sled --encrypt --key mysecretkey
|
||||
```
|
||||
For detailed launch options, see [Basics](docs/basics.md).
|
||||
|
||||
## Usage with Redis Clients
|
||||
|
||||
@@ -76,10 +61,24 @@ redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10
|
||||
# 2) 1) "user:1"
|
||||
```
|
||||
|
||||
## Cryptography
|
||||
|
||||
HeroDB supports asymmetric encryption/signatures via AGE commands (X25519 for encryption, Ed25519 for signatures) in stateless or key-managed modes, and symmetric encryption via SYM commands. Keys are persisted in the admin database (DB 0) for managed modes.
|
||||
|
||||
For details, see [AGE Cryptography](docs/age.md) and [Basics](docs/basics.md).
|
||||
|
||||
## Database Management
|
||||
|
||||
Databases are managed via JSON-RPC API, with metadata stored in the encrypted admin database (DB 0). Databases are public by default upon creation; use RPC to set them private, requiring access keys for SELECT operations (read or readwrite based on permissions). This includes per-database encryption keys, access control, and lifecycle management.
|
||||
|
||||
For examples, see [JSON-RPC Examples](docs/rpc_examples.md) and [Admin DB 0 Model](docs/admin.md).
|
||||
|
||||
## Documentation
|
||||
|
||||
For more detailed information on commands, features, and advanced usage, please refer to the documentation:
|
||||
|
||||
- [Basics](docs/basics.md)
|
||||
- [Supported Commands](docs/cmds.md)
|
||||
- [AGE Cryptography](docs/age.md)
|
||||
- [AGE Cryptography](docs/age.md)
|
||||
- [Admin DB 0 Model (access control, per-db encryption)](docs/admin.md)
|
||||
- [JSON-RPC Examples (management API)](docs/rpc_examples.md)
|
181
docs/admin.md
Normal file
181
docs/admin.md
Normal file
@@ -0,0 +1,181 @@
|
||||
# Admin Database 0 (`0.db`)
|
||||
|
||||
This page explains what the Admin Database `DB 0` is, why HeroDB uses it, and how to work with it as a developer and end-user. It’s a practical guide covering how databases are created, listed, secured with access keys, and encrypted using per-database secrets.
|
||||
|
||||
## What is `DB 0`?
|
||||
|
||||
`DB 0` is the control-plane for a HeroDB instance. It stores metadata for all user databases (`db_id >= 1`) so the server can:
|
||||
- Know which databases exist (without scanning the filesystem)
|
||||
- Enforce access control (public/private with access keys)
|
||||
- Enforce per-database encryption (whether a given database must be opened encrypted and with which write-only key)
|
||||
|
||||
`DB 0` itself is always encrypted with the admin secret (the process-level secret provided at startup).
|
||||
|
||||
## How `DB 0` is created and secured
|
||||
|
||||
- `DB 0` lives at `<base_dir>/0.db`
|
||||
- It is always encrypted using the `admin secret` provided at process startup (using the `--admin-secret <secret>` CLI flag)
|
||||
- Only clients that provide the correct admin secret can `SELECT 0` (see “`SELECT` + `KEY`” below)
|
||||
|
||||
At startup, the server bootstraps `DB 0` (initializes counters and structures) if it’s missing.
|
||||
|
||||
## Metadata stored in `DB 0`
|
||||
|
||||
Keys in `DB 0` (internal layout, but useful to understand how things work):
|
||||
|
||||
- `admin:next_id`
|
||||
- String counter holding the next id to allocate (initialized to `"1"`)
|
||||
|
||||
- `admin:dbs`
|
||||
- A hash acting as a set of existing database ids
|
||||
- field = id (as string), value = `"1"`
|
||||
|
||||
- `meta:db:<id>`
|
||||
- A hash holding db-level metadata
|
||||
- field `public` = `"true"` or `"false"` (defaults to `true` if missing)
|
||||
|
||||
- `meta:db:<id>:keys`
|
||||
- A hash mapping access-key hashes to the string `Permission:created_at_seconds`
|
||||
- Examples: `Read:1713456789` or `ReadWrite:1713456789`
|
||||
- The plaintext access keys are never stored; only their `SHA-256` hashes are kept
|
||||
|
||||
- `meta:db:<id>:enc`
|
||||
- A string holding the per-database encryption key used to open `<id>.db` encrypted
|
||||
- This value is write-only from the perspective of the management APIs (it’s set at creation and never returned)
|
||||
|
||||
- `age:key:<name>`
|
||||
- Base64-encoded X25519 recipient (public encryption key) for named AGE keys
|
||||
- `age:privkey:<name>`
|
||||
- Base64-encoded X25519 identity (secret encryption key) for named AGE keys
|
||||
- `age:signpub:<name>`
|
||||
- Base64-encoded Ed25519 verify public key for named AGE keys
|
||||
- `age:signpriv:<name>`
|
||||
- Base64-encoded Ed25519 signing secret key for named AGE keys
|
||||
|
||||
> You don’t need to manipulate these keys directly; they’re listed to clarify the model. AGE keys are managed via AGE commands.
|
||||
|
||||
## Database lifecycle
|
||||
|
||||
1) Create a database (via JSON-RPC)
|
||||
- The server allocates an id from `admin:next_id`, registers it in `admin:dbs`, and defaults the database to `public=true`
|
||||
- If you pass an optional `encryption_key` during creation, the server persists it in `meta:db:<id>:enc`. That database will be opened in encrypted mode from then on
|
||||
|
||||
2) Open and use a database
|
||||
- Clients select a database over RESP using `SELECT`
|
||||
- Authorization and encryption state are enforced using `DB 0` metadata
|
||||
|
||||
3) Delete database files
|
||||
- Removing `<id>.db` removes the physical storage
|
||||
- `DB 0` remains the source of truth for existence and may be updated by future management methods as the system evolves
|
||||
|
||||
## Access control model
|
||||
|
||||
- Public database (default)
|
||||
- Anyone can `SELECT <id>` with no key, and will get `ReadWrite` permission
|
||||
- Private database
|
||||
- You must provide an access key when selecting the database
|
||||
- The server hashes the provided key with `SHA-256` and checks membership in `meta:db:<id>:keys`
|
||||
- Permissions are `Read` or `ReadWrite` depending on how the key was added
|
||||
- Admin `DB 0`
|
||||
- Requires the exact admin secret as the `KEY` argument to `SELECT 0`
|
||||
- Permission is `ReadWrite` when the secret matches
|
||||
|
||||
### How to select databases with optional `KEY`
|
||||
|
||||
- Public DB (no key required)
|
||||
- `SELECT <id>`
|
||||
|
||||
- Private DB (access key required)
|
||||
- `SELECT <id> KEY <plaintext_key>`
|
||||
|
||||
- Admin `DB 0` (admin secret required)
|
||||
- `SELECT 0 KEY <admin_secret>`
|
||||
|
||||
Examples (using `redis-cli`):
|
||||
```bash
|
||||
# Public database
|
||||
redis-cli -p $PORT SELECT 1
|
||||
# → OK
|
||||
|
||||
# Private database
|
||||
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
|
||||
# → OK
|
||||
|
||||
# Admin DB 0
|
||||
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
|
||||
# → OK
|
||||
```
|
||||
|
||||
## Per-database encryption
|
||||
|
||||
- At database creation, you can provide an optional per-db encryption key
|
||||
- If provided, the server persists that key in `DB 0` as `meta:db:<id>:enc`
|
||||
- When you later open the database, the engine checks whether `meta:db:<id>:enc` exists to decide if it must open `<id>.db` in encrypted mode
|
||||
- The per-db key is not returned by RPC—it is considered write-only configuration data
|
||||
|
||||
Operationally:
|
||||
- Create with encryption: pass a non-null `encryption_key` to the `createDatabase` RPC
|
||||
- Open later: simply `SELECT` the database; encryption is transparent to clients
|
||||
|
||||
## Management via JSON-RPC
|
||||
|
||||
You can manage databases using the management RPC (namespaced `herodb.*`). Typical operations:
|
||||
- `createDatabase(backend, config, encryption_key?)`
|
||||
- Allocates a new id, sets optional encryption key
|
||||
- `listDatabases()`
|
||||
- Lists database ids and info (including whether storage is currently encrypted)
|
||||
- `getDatabaseInfo(db_id)`
|
||||
- Returns details: backend, encrypted flag, size on disk, `key_count`, timestamps, etc.
|
||||
- `addAccessKey(db_id, key, permissions)`
|
||||
- Adds a `Read` or `ReadWrite` access key (permissions = `"read"` | `"readwrite"`)
|
||||
- `listAccessKeys(db_id)`
|
||||
- Returns hashes and permissions; you can use these hashes to delete keys
|
||||
- `deleteAccessKey(db_id, key_hash)`
|
||||
- Removes a key by its hash
|
||||
- `setDatabasePublic(db_id, public)`
|
||||
- Toggles public/private
|
||||
|
||||
Copyable JSON examples are provided in the [RPC examples documentation](./rpc_examples.md).
|
||||
|
||||
## Typical flows
|
||||
|
||||
1) Public, unencrypted database
|
||||
- Create a new database without an encryption key
|
||||
- Clients can immediately `SELECT <id>` without a key
|
||||
- You can later make it private and add keys if needed
|
||||
|
||||
2) Private, encrypted database
|
||||
- Create passing an `encryption_key`
|
||||
- Mark it private (`setDatabasePublic false`) and add access keys
|
||||
- Clients must use `SELECT <id> KEY <plaintext_access_key>`
|
||||
- Storage opens in encrypted mode automatically
|
||||
|
||||
## Security notes
|
||||
|
||||
- Only `SHA-256` hashes of access keys are stored in `DB 0`; keep plaintext keys safe on the client side
|
||||
- The per-db encryption key is never exposed via the API after it is set
|
||||
- The admin secret must be kept secure; anyone with it can `SELECT 0` and perform administrative actions
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- `ERR invalid access key` when selecting a private db
|
||||
- Ensure you passed the `KEY` argument: `SELECT <id> KEY <plaintext_key>`
|
||||
- If you recently added the key, confirm the permissions and that you used the exact plaintext (hash must match)
|
||||
|
||||
- `Database X not found`
|
||||
- The id isn’t registered in `DB 0` (`admin:dbs`). Use the management APIs to create or list databases
|
||||
|
||||
- Cannot `SELECT 0`
|
||||
- The `KEY` must be the exact admin secret passed at server startup
|
||||
|
||||
## Reference
|
||||
|
||||
- Admin metadata lives in `DB 0` (`0.db`) and controls:
|
||||
- Existence: `admin:dbs`
|
||||
- Access: `meta:db:<id>.public` and `meta:db:<id>:keys`
|
||||
- Encryption: `meta:db:<id>:enc`
|
||||
|
||||
For command examples and management payloads:
|
||||
- RESP command basics: `docs/basics.md`
|
||||
- Supported commands: `docs/cmds.md`
|
||||
- JSON-RPC examples: `docs/rpc_examples.md`
|
242
docs/age.md
242
docs/age.md
@@ -1,188 +1,96 @@
|
||||
# HeroDB AGE usage: Stateless vs Key‑Managed
|
||||
# HeroDB AGE Cryptography
|
||||
|
||||
This document explains how to use the AGE cryptography commands exposed by HeroDB over the Redis protocol in two modes:
|
||||
- Stateless (ephemeral keys; nothing stored on the server)
|
||||
- Key‑managed (server‑persisted, named keys)
|
||||
HeroDB provides AGE-based asymmetric encryption and digital signatures over the Redis protocol using X25519 for encryption and Ed25519 for signatures. Keys can be used in stateless (ephemeral) or key-managed (persistent, named) modes.
|
||||
|
||||
If you are new to the codebase, the exact tests that exercise these behaviors are:
|
||||
- [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
|
||||
- [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
|
||||
In key-managed mode, HeroDB uses a unified keypair concept: a single Ed25519 signing key is deterministically derived into X25519 keys for encryption, allowing one keypair to handle both encryption and signatures transparently.
|
||||
|
||||
Implementation entry points:
|
||||
- [herodb/src/age.rs](herodb/src/age.rs)
|
||||
- Dispatch from [herodb/src/cmd.rs](herodb/src/cmd.rs)
|
||||
## Cryptographic Algorithms
|
||||
|
||||
Note: Database-at-rest encryption flags in the test harness are unrelated to AGE commands; those flags control storage-level encryption of DB files. See the harness near [rust.start_test_server()](herodb/tests/usage_suite.rs:10).
|
||||
### X25519 (Encryption)
|
||||
- Elliptic-curve Diffie-Hellman key exchange for symmetric key derivation.
|
||||
- Used for encrypting/decrypting messages.
|
||||
|
||||
## Quick start
|
||||
### Ed25519 (Signatures)
|
||||
- EdDSA digital signatures for message authentication.
|
||||
- Used for signing/verifying messages.
|
||||
|
||||
Assuming the server is running on localhost on some $PORT:
|
||||
### Key Derivation
|
||||
Ed25519 signing keys are deterministically converted to X25519 keys for encryption. This enables a single keypair to support both operations without additional keys. Derivation uses the Ed25519 secret scalar clamped for X25519.
|
||||
|
||||
In named keypairs, Ed25519 keys are stored, and X25519 keys are derived on-demand and cached.
|
||||
|
||||
## Stateless Mode (Ephemeral Keys)
|
||||
No server-side storage; keys are provided with each command.
|
||||
|
||||
Available commands:
|
||||
- `AGE GENENC`: Generate ephemeral X25519 keypair. Returns `[recipient, identity]`.
|
||||
- `AGE GENSIGN`: Generate ephemeral Ed25519 keypair. Returns `[verify_pub, sign_secret]`.
|
||||
- `AGE ENCRYPT <recipient> <message>`: Encrypt message. Returns base64 ciphertext.
|
||||
- `AGE DECRYPT <identity> <ciphertext_b64>`: Decrypt ciphertext. Returns plaintext.
|
||||
- `AGE SIGN <sign_secret> <message>`: Sign message. Returns base64 signature.
|
||||
- `AGE VERIFY <verify_pub> <message> <signature_b64>`: Verify signature. Returns 1 (valid) or 0 (invalid).
|
||||
|
||||
Example:
|
||||
```bash
|
||||
~/code/git.ourworld.tf/herocode/herodb/herodb/build.sh
|
||||
~/code/git.ourworld.tf/herocode/herodb/target/release/herodb --dir /tmp/data --debug --$PORT 6381 --encryption-key 1234 --encrypt
|
||||
```
|
||||
redis-cli AGE GENENC
|
||||
# → 1) "age1qz..." # recipient (X25519 public)
|
||||
# 2) "AGE-SECRET-KEY-1..." # identity (X25519 secret)
|
||||
|
||||
redis-cli AGE ENCRYPT "age1qz..." "hello"
|
||||
# → base64_ciphertext
|
||||
|
||||
```bash
|
||||
export PORT=6381
|
||||
# Generate an ephemeral keypair and encrypt/decrypt a message (stateless mode)
|
||||
redis-cli -p $PORT AGE GENENC
|
||||
# → returns an array: [recipient, identity]
|
||||
|
||||
redis-cli -p $PORT AGE ENCRYPT <recipient> "hello world"
|
||||
# → returns ciphertext (base64 in a bulk string)
|
||||
|
||||
redis-cli -p $PORT AGE DECRYPT <identity> <ciphertext_b64>
|
||||
# → returns "hello world"
|
||||
```
|
||||
|
||||
For key‑managed mode, generate a named key once and reference it by name afterwards:
|
||||
|
||||
```bash
|
||||
redis-cli -p $PORT AGE KEYGEN app1
|
||||
# → persists encryption keypair under name "app1"
|
||||
|
||||
redis-cli -p $PORT AGE ENCRYPTNAME app1 "hello"
|
||||
redis-cli -p $PORT AGE DECRYPTNAME app1 <ciphertext_b64>
|
||||
```
|
||||
|
||||
## Stateless AGE (ephemeral)
|
||||
|
||||
Characteristics
|
||||
|
||||
- No server‑side storage of keys.
|
||||
- You pass the actual key material with every call.
|
||||
- Not listable via AGE LIST.
|
||||
|
||||
Commands and examples
|
||||
|
||||
1) Ephemeral encryption keys
|
||||
|
||||
```bash
|
||||
# Generate an ephemeral encryption keypair
|
||||
redis-cli -p $PORT AGE GENENC
|
||||
# Example output (abridged):
|
||||
# 1) "age1qz..." # recipient (public key) = can be used by others e.g. to verify what I sign
|
||||
# 2) "AGE-SECRET-KEY-1..." # identity (secret) = is like my private, cannot lose this one
|
||||
|
||||
# Encrypt with the recipient public key
|
||||
redis-cli -p $PORT AGE ENCRYPT "age1qz..." "hello world"
|
||||
|
||||
# → returns bulk string payload: base64 ciphertext (encrypted content)
|
||||
|
||||
# Decrypt with the identity (secret) in other words your private key
|
||||
redis-cli -p $PORT AGE DECRYPT "AGE-SECRET-KEY-1..." "<ciphertext_b64>"
|
||||
# → "hello world"
|
||||
```
|
||||
|
||||
2) Ephemeral signing keys
|
||||
|
||||
> ? is this same as my private key
|
||||
|
||||
```bash
|
||||
|
||||
# Generate an ephemeral signing keypair
|
||||
redis-cli -p $PORT AGE GENSIGN
|
||||
# Example output:
|
||||
# 1) "<verify_pub_b64>"
|
||||
# 2) "<sign_secret_b64>"
|
||||
|
||||
# Sign a message with the secret
|
||||
redis-cli -p $PORT AGE SIGN "<sign_secret_b64>" "msg"
|
||||
# → returns "<signature_b64>"
|
||||
|
||||
# Verify with the public key
|
||||
redis-cli -p $PORT AGE VERIFY "<verify_pub_b64>" "msg" "<signature_b64>"
|
||||
# → 1 (valid) or 0 (invalid)
|
||||
```
|
||||
|
||||
When to use
|
||||
- You do not want the server to store private keys.
|
||||
- You already manage key material on the client side.
|
||||
- You need ad‑hoc operations without persistence.
|
||||
|
||||
Reference test: [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
|
||||
|
||||
## Key‑managed AGE (persistent, named)
|
||||
|
||||
Characteristics
|
||||
- Server generates and persists keypairs under a chosen name.
|
||||
- Clients refer to keys by name; raw secrets are not supplied on each call.
|
||||
- Keys are discoverable via AGE LIST.
|
||||
|
||||
Commands and examples
|
||||
|
||||
1) Named encryption keys
|
||||
|
||||
```bash
|
||||
# Create/persist a named encryption keypair
|
||||
redis-cli -p $PORT AGE KEYGEN app1
|
||||
# → returns [recipient, identity] but also stores them under name "app1"
|
||||
|
||||
> TODO: should not return identity (security, but there can be separate function to export it e.g. AGE EXPORTKEY app1)
|
||||
|
||||
# Encrypt using the stored public key
|
||||
redis-cli -p $PORT AGE ENCRYPTNAME app1 "hello"
|
||||
# → returns bulk string payload: base64 ciphertext
|
||||
|
||||
# Decrypt using the stored secret
|
||||
redis-cli -p $PORT AGE DECRYPTNAME app1 "<ciphertext_b64>"
|
||||
redis-cli AGE DECRYPT "AGE-SECRET-KEY-1..." base64_ciphertext
|
||||
# → "hello"
|
||||
```
|
||||
|
||||
2) Named signing keys
|
||||
## Key-Managed Mode (Persistent Named Keys)
|
||||
Keys are stored server-side under names. Supports unified keypairs for both encryption and signatures.
|
||||
|
||||
Available commands:
|
||||
- `AGE KEYGEN <name>`: Generate and store unified keypair. Returns `[recipient, identity]` in age format.
|
||||
- `AGE SIGNKEYGEN <name>`: Generate and store Ed25519 signing keypair. Returns `[verify_pub, sign_secret]`.
|
||||
- `AGE ENCRYPTNAME <name> <message>`: Encrypt with named key. Returns base64 ciphertext.
|
||||
- `AGE DECRYPTNAME <name> <ciphertext_b64>`: Decrypt with named key. Returns plaintext.
|
||||
- `AGE SIGNNAME <name> <message>`: Sign with named key. Returns base64 signature.
|
||||
- `AGE VERIFYNAME <name> <message> <signature_b64>`: Verify with named key. Returns 1 or 0.
|
||||
- `AGE LIST`: List all stored key names. Returns sorted array of names.
|
||||
|
||||
### AGE LIST Output
|
||||
Returns a flat, deduplicated, sorted array of key names (strings). Each name corresponds to a stored keypair, which may include encryption keys (X25519), signing keys (Ed25519), or both.
|
||||
|
||||
Output format: `["name1", "name2", ...]`
|
||||
|
||||
Example:
|
||||
```bash
|
||||
# Create/persist a named signing keypair
|
||||
redis-cli -p $PORT AGE SIGNKEYGEN app1
|
||||
# → returns [verify_pub_b64, sign_secret_b64] and stores under name "app1"
|
||||
|
||||
> TODO: should not return sign_secret_b64 (for security, but there can be separate function to export it e.g. AGE EXPORTSIGNKEY app1)
|
||||
|
||||
# Sign using the stored secret
|
||||
redis-cli -p $PORT AGE SIGNNAME app1 "msg"
|
||||
# → returns "<signature_b64>"
|
||||
|
||||
# Verify using the stored public key
|
||||
redis-cli -p $PORT AGE VERIFYNAME app1 "msg" "<signature_b64>"
|
||||
# → 1 (valid) or 0 (invalid)
|
||||
redis-cli AGE LIST
|
||||
# → 1) "<named_keypair_1>"
|
||||
# 2) "<named_keypair_2>"
|
||||
```
|
||||
|
||||
3) List stored AGE keys
|
||||
For unified keypairs (from `AGE KEYGEN`), the name handles both encryption (derived X25519) and signatures (stored Ed25519) transparently.
|
||||
|
||||
Example with named keys:
|
||||
```bash
|
||||
redis-cli -p $PORT AGE LIST
|
||||
# Example output includes labels such as "encpub" and your key names (e.g., "app1")
|
||||
redis-cli AGE KEYGEN app1
|
||||
# → 1) "age1..." # recipient
|
||||
# 2) "AGE-SECRET-KEY-1..." # identity
|
||||
|
||||
redis-cli AGE ENCRYPTNAME app1 "secret message"
|
||||
# → base64_ciphertext
|
||||
|
||||
redis-cli AGE DECRYPTNAME app1 base64_ciphertext
|
||||
# → "secret message"
|
||||
|
||||
redis-cli AGE SIGNNAME app1 "message"
|
||||
# → base64_signature
|
||||
|
||||
redis-cli AGE VERIFYNAME app1 "message" base64_signature
|
||||
# → 1
|
||||
```
|
||||
|
||||
When to use
|
||||
- You want centralized key storage/rotation and fewer secrets on the client.
|
||||
- You need names/labels for workflows and can trust the server with secrets.
|
||||
- You want discoverability (AGE LIST) and simpler client commands.
|
||||
## Choosing a Mode
|
||||
- **Stateless**: For ad-hoc operations without persistence; client manages keys.
|
||||
- **Key-managed**: For centralized key lifecycle; server stores keys for convenience and discoverability.
|
||||
|
||||
Reference test: [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
|
||||
|
||||
## Choosing a mode
|
||||
|
||||
- Prefer Stateless when:
|
||||
- Minimizing server trust for secret material is the priority.
|
||||
- Clients already have a secure mechanism to store/distribute keys.
|
||||
- Prefer Key‑managed when:
|
||||
- Centralized lifecycle, naming, and discoverability are beneficial.
|
||||
- You plan to integrate rotation, ACLs, or auditability on the server side.
|
||||
|
||||
## Security notes
|
||||
|
||||
- Treat identities and signing secrets as sensitive; avoid logging them.
|
||||
- For key‑managed mode, ensure server storage (and backups) are protected.
|
||||
- AGE operations here are application‑level crypto and are distinct from database-at-rest encryption configured in the test harness.
|
||||
|
||||
## Repository pointers
|
||||
|
||||
- Stateless examples in tests: [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
|
||||
- Key‑managed examples in tests: [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
|
||||
- AGE implementation: [herodb/src/age.rs](herodb/src/age.rs)
|
||||
- Command dispatch: [herodb/src/cmd.rs](herodb/src/cmd.rs)
|
||||
- Bash demo: [herodb/examples/age_bash_demo.sh](herodb/examples/age_bash_demo.sh)
|
||||
- Rust persistent demo: [herodb/examples/age_persist_demo.rs](herodb/examples/age_persist_demo.rs)
|
||||
- Additional notes: [herodb/instructions/encrypt.md](herodb/instructions/encrypt.md)
|
||||
Implementation: [herodb/src/age.rs](herodb/src/age.rs) <br>
|
||||
Tests: [herodb/tests/usage_suite.rs](herodb/tests/usage_suite.rs)
|
103
docs/basics.md
103
docs/basics.md
@@ -1,4 +1,58 @@
|
||||
Here's an expanded version of the cmds.md documentation to include the list commands:
|
||||
# HeroDB Basics
|
||||
|
||||
## Launching HeroDB
|
||||
|
||||
To launch HeroDB, use the binary with required and optional flags. The `--admin-secret` flag is mandatory, encrypting the admin database (DB 0) and authorizing admin access.
|
||||
|
||||
### Launch Flags
|
||||
- `--dir <path>`: Directory for database files (default: current directory).
|
||||
- `--port <port>`: TCP port for Redis protocol (default: 6379).
|
||||
- `--debug`: Enable debug logging.
|
||||
- `--sled`: Use Sled backend (default: Redb).
|
||||
- `--enable-rpc`: Start JSON-RPC management server on port 8080.
|
||||
- `--rpc-port <port>`: Custom RPC port (default: 8080).
|
||||
- `--admin-secret <secret>`: Required secret for DB 0 encryption and admin access.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
./target/release/herodb --dir /tmp/herodb --admin-secret mysecret --port 6379 --enable-rpc
|
||||
```
|
||||
|
||||
Deprecated flags (`--encrypt`, `--encryption-key`) are ignored for data DBs; per-database encryption is managed via RPC.
|
||||
|
||||
## Admin Database (DB 0)
|
||||
|
||||
DB 0 acts as the administrative database instance, storing metadata for all user databases (IDs >= 1). It controls existence, access control, and per-database encryption. DB 0 is always encrypted with the `--admin-secret`.
|
||||
|
||||
When creating a new database, DB 0 allocates an ID, registers it, and optionally stores a per-database encryption key (write-only). Databases are public by default; use RPC to set them private, requiring access keys for SELECT (read or readwrite based on permissions). Keys are persisted in DB 0 for managed AGE operations.
|
||||
|
||||
Access DB 0 with `SELECT 0 KEY <admin-secret>`.
|
||||
|
||||
## Symmetric Encryption
|
||||
|
||||
HeroDB supports stateless symmetric encryption via SYM commands, using XChaCha20-Poly1305 AEAD.
|
||||
|
||||
Commands:
|
||||
- `SYM KEYGEN`: Generate 32-byte key. Returns base64-encoded key.
|
||||
- `SYM ENCRYPT <key_b64> <message>`: Encrypt message. Returns base64 ciphertext.
|
||||
- `SYM DECRYPT <key_b64> <ciphertext_b64>`: Decrypt. Returns plaintext.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
redis-cli SYM KEYGEN
|
||||
# → base64_key
|
||||
|
||||
redis-cli SYM ENCRYPT base64_key "secret"
|
||||
# → base64_ciphertext
|
||||
|
||||
redis-cli SYM DECRYPT base64_key base64_ciphertext
|
||||
# → "secret"
|
||||
```
|
||||
|
||||
## RPC Options
|
||||
|
||||
Enable the JSON-RPC server with `--enable-rpc` for database management. Methods include creating databases, managing access keys, and setting encryption. See [JSON-RPC Examples](./rpc_examples.md) for payloads.
|
||||
|
||||
# HeroDB Commands
|
||||
|
||||
HeroDB implements a subset of Redis commands over the Redis protocol. This document describes the available commands and their usage.
|
||||
@@ -575,6 +629,29 @@ redis-cli -p $PORT AGE LIST
|
||||
# 2) "keyname2"
|
||||
```
|
||||
|
||||
## SYM Commands
|
||||
|
||||
### SYM KEYGEN
|
||||
Generate a symmetric encryption key.
|
||||
```bash
|
||||
redis-cli -p $PORT SYM KEYGEN
|
||||
# → base64_encoded_32byte_key
|
||||
```
|
||||
|
||||
### SYM ENCRYPT
|
||||
Encrypt a message with a symmetric key.
|
||||
```bash
|
||||
redis-cli -p $PORT SYM ENCRYPT <key_b64> "message"
|
||||
# → base64_encoded_ciphertext
|
||||
```
|
||||
|
||||
### SYM DECRYPT
|
||||
Decrypt a ciphertext with a symmetric key.
|
||||
```bash
|
||||
redis-cli -p $PORT SYM DECRYPT <key_b64> <ciphertext_b64>
|
||||
# → decrypted_message
|
||||
```
|
||||
|
||||
## Server Information Commands
|
||||
|
||||
### INFO
|
||||
@@ -621,3 +698,27 @@ This expanded documentation includes all the list commands that were implemented
|
||||
10. LINDEX - get element by index
|
||||
11. LRANGE - get range of elements
|
||||
|
||||
|
||||
## Updated Database Selection and Access Keys
|
||||
|
||||
HeroDB uses an `Admin DB 0` to control database existence, access, and encryption. Access to data DBs can be public (no key) or private (requires a key). See detailed model in `docs/admin.md`.
|
||||
|
||||
Examples:
|
||||
|
||||
```bash
|
||||
# Public database (no key required)
|
||||
redis-cli -p $PORT SELECT 1
|
||||
# → OK
|
||||
```
|
||||
|
||||
```bash
|
||||
# Private database (requires access key)
|
||||
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
|
||||
# → OK
|
||||
```
|
||||
|
||||
```bash
|
||||
# Admin DB 0 (requires admin secret)
|
||||
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
|
||||
# → OK
|
||||
```
|
||||
|
23
docs/cmds.md
23
docs/cmds.md
@@ -122,4 +122,27 @@ redis-cli -p 6379 --rdb dump.rdb
|
||||
|
||||
# Import to sled
|
||||
redis-cli -p 6381 --pipe < dump.rdb
|
||||
```
|
||||
|
||||
## Authentication and Database Selection
|
||||
|
||||
HeroDB uses an `Admin DB 0` to govern database existence, access and per-db encryption. Access control is enforced via `Admin DB 0` metadata. See the full model in `docs/admin.md`.
|
||||
|
||||
Examples:
|
||||
```bash
|
||||
# Public database (no key required)
|
||||
redis-cli -p $PORT SELECT 1
|
||||
# → OK
|
||||
```
|
||||
|
||||
```bash
|
||||
# Private database (requires access key)
|
||||
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
|
||||
# → OK
|
||||
```
|
||||
|
||||
```bash
|
||||
# Admin DB 0 (requires admin secret)
|
||||
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
|
||||
# → OK
|
||||
```
|
@@ -1,454 +0,0 @@
|
||||
# Lance Vector Database Operations
|
||||
|
||||
HeroDB includes a powerful vector database integration using Lance, enabling high-performance vector storage, search, and multimodal data management. By default, it uses Ollama for local text embeddings, with support for custom external embedding services.
|
||||
|
||||
## Overview
|
||||
|
||||
The Lance vector database integration provides:
|
||||
|
||||
- **High-performance vector storage** using Lance's columnar format
|
||||
- **Local Ollama integration** for text embeddings (default, no external dependencies)
|
||||
- **Custom embedding service support** for advanced use cases
|
||||
- **Text embedding support** (images via custom services)
|
||||
- **Vector similarity search** with configurable parameters
|
||||
- **Scalable indexing** with IVF_PQ (Inverted File with Product Quantization)
|
||||
- **Redis-compatible command interface**
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
||||
│ HeroDB │ │ External │ │ Lance │
|
||||
│ Redis Server │◄──►│ Embedding │ │ Vector Store │
|
||||
│ │ │ Service │ │ │
|
||||
└─────────────────┘ └──────────────────┘ └─────────────────┘
|
||||
│ │ │
|
||||
│ │ │
|
||||
Redis Protocol HTTP API Arrow/Parquet
|
||||
Commands JSON Requests Columnar Storage
|
||||
```
|
||||
|
||||
### Key Components
|
||||
|
||||
1. **Lance Store**: High-performance columnar vector storage
|
||||
2. **Ollama Integration**: Local embedding service (default)
|
||||
3. **Custom Embedding Service**: Optional HTTP API for advanced use cases
|
||||
4. **Redis Command Interface**: Familiar Redis-style commands
|
||||
5. **Arrow Schema**: Flexible schema definition for metadata
|
||||
|
||||
## Configuration
|
||||
|
||||
### Default Setup (Ollama)
|
||||
|
||||
HeroDB uses Ollama by default for text embeddings. No configuration is required if Ollama is running locally:
|
||||
|
||||
```bash
|
||||
# Install Ollama (if not already installed)
|
||||
# Visit: https://ollama.ai
|
||||
|
||||
# Pull the embedding model
|
||||
ollama pull nomic-embed-text
|
||||
|
||||
# Ollama automatically runs on localhost:11434
|
||||
# HeroDB will use this by default
|
||||
```
|
||||
|
||||
**Default Configuration:**
|
||||
- **URL**: `http://localhost:11434`
|
||||
- **Model**: `nomic-embed-text`
|
||||
- **Dimensions**: 768 (for nomic-embed-text)
|
||||
|
||||
### Custom Embedding Service (Optional)
|
||||
|
||||
To use a custom embedding service instead of Ollama:
|
||||
|
||||
```bash
|
||||
# Set custom embedding service URL
|
||||
redis-cli HSET config:core:aiembed url "http://your-embedding-service:8080/embed"
|
||||
|
||||
# Optional: Set authentication if required
|
||||
redis-cli HSET config:core:aiembed token "your-api-token"
|
||||
```
|
||||
|
||||
### Embedding Service API Contracts
|
||||
|
||||
#### Ollama API (Default)
|
||||
HeroDB calls Ollama using this format:
|
||||
|
||||
```bash
|
||||
POST http://localhost:11434/api/embeddings
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"prompt": "Your text to embed"
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"embedding": [0.1, 0.2, 0.3, ...]
|
||||
}
|
||||
```
|
||||
|
||||
#### Custom Service API
|
||||
Your custom embedding service should accept POST requests with this JSON format:
|
||||
|
||||
```json
|
||||
{
|
||||
"texts": ["text1", "text2"], // Optional: array of texts
|
||||
"images": ["base64_image1", "base64_image2"], // Optional: base64 encoded images
|
||||
"model": "your-model-name" // Optional: model specification
|
||||
}
|
||||
```
|
||||
|
||||
And return responses in this format:
|
||||
|
||||
```json
|
||||
{
|
||||
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]], // Array of embedding vectors
|
||||
"model": "model-name", // Model used
|
||||
"usage": { // Optional usage stats
|
||||
"tokens": 100,
|
||||
"requests": 2
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Commands Reference
|
||||
|
||||
### Dataset Management
|
||||
|
||||
#### LANCE CREATE
|
||||
Create a new vector dataset with specified dimensions and optional schema.
|
||||
|
||||
```bash
|
||||
LANCE CREATE <dataset> DIM <dimension> [SCHEMA field:type ...]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Name of the dataset
|
||||
- `dimension`: Vector dimension (e.g., 384, 768, 1536)
|
||||
- `field:type`: Optional metadata fields (string, int, float, bool)
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Create a simple dataset for 384-dimensional vectors
|
||||
LANCE CREATE documents DIM 384
|
||||
|
||||
# Create dataset with metadata schema
|
||||
LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool
|
||||
```
|
||||
|
||||
#### LANCE LIST
|
||||
List all available datasets.
|
||||
|
||||
```bash
|
||||
LANCE LIST
|
||||
```
|
||||
|
||||
**Returns:** Array of dataset names
|
||||
|
||||
#### LANCE INFO
|
||||
Get information about a specific dataset.
|
||||
|
||||
```bash
|
||||
LANCE INFO <dataset>
|
||||
```
|
||||
|
||||
**Returns:** Dataset metadata including name, version, row count, and schema
|
||||
|
||||
#### LANCE DROP
|
||||
Delete a dataset and all its data.
|
||||
|
||||
```bash
|
||||
LANCE DROP <dataset>
|
||||
```
|
||||
|
||||
### Data Operations
|
||||
|
||||
#### LANCE STORE
|
||||
Store multimodal data (text/images) with automatic embedding generation.
|
||||
|
||||
```bash
|
||||
LANCE STORE <dataset> [TEXT <text>] [IMAGE <base64>] [key value ...]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Target dataset name
|
||||
- `TEXT`: Text content to embed
|
||||
- `IMAGE`: Base64-encoded image to embed
|
||||
- `key value`: Metadata key-value pairs
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Store text with metadata
|
||||
LANCE STORE documents TEXT "Machine learning is transforming industries" category "AI" author "John Doe"
|
||||
|
||||
# Store image with metadata
|
||||
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgAA..." category "nature" tags "landscape,mountains"
|
||||
|
||||
# Store both text and image
|
||||
LANCE STORE multimodal TEXT "Beautiful sunset" IMAGE "base64data..." location "California"
|
||||
```
|
||||
|
||||
**Returns:** Unique ID of the stored item
|
||||
|
||||
### Search Operations
|
||||
|
||||
#### LANCE SEARCH
|
||||
Search using a raw vector.
|
||||
|
||||
```bash
|
||||
LANCE SEARCH <dataset> VECTOR <vector> K <k> [NPROBES <n>] [REFINE <r>]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Dataset to search
|
||||
- `vector`: Comma-separated vector values (e.g., "0.1,0.2,0.3")
|
||||
- `k`: Number of results to return
|
||||
- `NPROBES`: Number of partitions to search (optional)
|
||||
- `REFINE`: Refine factor for better accuracy (optional)
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
LANCE SEARCH documents VECTOR "0.1,0.2,0.3,0.4" K 5 NPROBES 10
|
||||
```
|
||||
|
||||
#### LANCE SEARCH.TEXT
|
||||
Search using text query (automatically embedded).
|
||||
|
||||
```bash
|
||||
LANCE SEARCH.TEXT <dataset> <query_text> K <k> [NPROBES <n>] [REFINE <r>]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Dataset to search
|
||||
- `query_text`: Text query to search for
|
||||
- `k`: Number of results to return
|
||||
- `NPROBES`: Number of partitions to search (optional)
|
||||
- `REFINE`: Refine factor for better accuracy (optional)
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
LANCE SEARCH.TEXT documents "artificial intelligence applications" K 10 NPROBES 20
|
||||
```
|
||||
|
||||
**Returns:** Array of results with distance scores and metadata
|
||||
|
||||
### Embedding Operations
|
||||
|
||||
#### LANCE EMBED.TEXT
|
||||
Generate embeddings for text without storing.
|
||||
|
||||
```bash
|
||||
LANCE EMBED.TEXT <text1> [text2] [text3] ...
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
LANCE EMBED.TEXT "Hello world" "Machine learning" "Vector database"
|
||||
```
|
||||
|
||||
**Returns:** Array of embedding vectors
|
||||
|
||||
### Index Management
|
||||
|
||||
#### LANCE CREATE.INDEX
|
||||
Create a vector index for faster search performance.
|
||||
|
||||
```bash
|
||||
LANCE CREATE.INDEX <dataset> <index_type> [PARTITIONS <n>] [SUBVECTORS <n>]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Dataset to index
|
||||
- `index_type`: Index type (currently supports "IVF_PQ")
|
||||
- `PARTITIONS`: Number of partitions (default: 256)
|
||||
- `SUBVECTORS`: Number of sub-vectors for PQ (default: 16)
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 512 SUBVECTORS 32
|
||||
```
|
||||
|
||||
## Usage Patterns
|
||||
|
||||
### 1. Document Search System
|
||||
|
||||
```bash
|
||||
# Setup
|
||||
LANCE CREATE documents DIM 384 SCHEMA title:string content:string category:string
|
||||
|
||||
# Store documents
|
||||
LANCE STORE documents TEXT "Introduction to machine learning algorithms" title "ML Basics" category "education"
|
||||
LANCE STORE documents TEXT "Deep learning neural networks explained" title "Deep Learning" category "education"
|
||||
LANCE STORE documents TEXT "Building scalable web applications" title "Web Dev" category "programming"
|
||||
|
||||
# Create index for better performance
|
||||
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 256
|
||||
|
||||
# Search
|
||||
LANCE SEARCH.TEXT documents "neural networks" K 5
|
||||
```
|
||||
|
||||
### 2. Image Similarity Search
|
||||
|
||||
```bash
|
||||
# Setup
|
||||
LANCE CREATE images DIM 512 SCHEMA filename:string tags:string
|
||||
|
||||
# Store images (base64 encoded)
|
||||
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgAA..." filename "sunset.jpg" tags "nature,landscape"
|
||||
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgBB..." filename "city.jpg" tags "urban,architecture"
|
||||
|
||||
# Search by image
|
||||
LANCE STORE temp_search IMAGE "query_image_base64..."
|
||||
# Then use the returned ID to get embedding and search
|
||||
```
|
||||
|
||||
### 3. Multimodal Content Management
|
||||
|
||||
```bash
|
||||
# Setup
|
||||
LANCE CREATE content DIM 768 SCHEMA type:string source:string
|
||||
|
||||
# Store mixed content
|
||||
LANCE STORE content TEXT "Product description for smartphone" type "product" source "catalog"
|
||||
LANCE STORE content IMAGE "product_image_base64..." type "product_image" source "catalog"
|
||||
|
||||
# Search across all content types
|
||||
LANCE SEARCH.TEXT content "smartphone features" K 10
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Vector Dimensions
|
||||
- **384**: Good for general text (e.g., sentence-transformers)
|
||||
- **768**: Standard for BERT-like models
|
||||
- **1536**: OpenAI text-embedding-ada-002
|
||||
- **Higher dimensions**: Better accuracy but slower search
|
||||
|
||||
### Index Configuration
|
||||
- **More partitions**: Better for larger datasets (>100K vectors)
|
||||
- **More sub-vectors**: Better compression but slower search
|
||||
- **NPROBES**: Higher values = better accuracy, slower search
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Create indexes** for datasets with >1000 vectors
|
||||
2. **Use appropriate dimensions** based on your embedding model
|
||||
3. **Configure NPROBES** based on accuracy vs speed requirements
|
||||
4. **Batch operations** when possible for better performance
|
||||
5. **Monitor embedding service** response times and rate limits
|
||||
|
||||
## Error Handling
|
||||
|
||||
Common error scenarios and solutions:
|
||||
|
||||
### Embedding Service Errors
|
||||
```bash
|
||||
# Error: Embedding service not configured
|
||||
ERR Embedding service URL not configured. Set it with: HSET config:core:aiembed url <YOUR_EMBEDDING_SERVICE_URL>
|
||||
|
||||
# Error: Service unavailable
|
||||
ERR Embedding service returned error 404 Not Found
|
||||
```
|
||||
|
||||
**Solution:** Ensure embedding service is running and URL is correct.
|
||||
|
||||
### Dataset Errors
|
||||
```bash
|
||||
# Error: Dataset doesn't exist
|
||||
ERR Dataset 'mydata' does not exist
|
||||
|
||||
# Error: Dimension mismatch
|
||||
ERR Vector dimension mismatch: expected 384, got 768
|
||||
```
|
||||
|
||||
**Solution:** Create dataset first or check vector dimensions.
|
||||
|
||||
### Search Errors
|
||||
```bash
|
||||
# Error: Invalid vector format
|
||||
ERR Invalid vector format
|
||||
|
||||
# Error: No index available
|
||||
ERR No index available for fast search
|
||||
```
|
||||
|
||||
**Solution:** Check vector format or create an index.
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### With Python
|
||||
```python
|
||||
import redis
|
||||
import json
|
||||
|
||||
r = redis.Redis(host='localhost', port=6379)
|
||||
|
||||
# Create dataset
|
||||
r.execute_command('LANCE', 'CREATE', 'docs', 'DIM', '384')
|
||||
|
||||
# Store document
|
||||
result = r.execute_command('LANCE', 'STORE', 'docs',
|
||||
'TEXT', 'Machine learning tutorial',
|
||||
'category', 'education')
|
||||
print(f"Stored with ID: {result}")
|
||||
|
||||
# Search
|
||||
results = r.execute_command('LANCE', 'SEARCH.TEXT', 'docs',
|
||||
'machine learning', 'K', '5')
|
||||
print(f"Search results: {results}")
|
||||
```
|
||||
|
||||
### With Node.js
|
||||
```javascript
|
||||
const redis = require('redis');
|
||||
const client = redis.createClient();
|
||||
|
||||
// Create dataset
|
||||
await client.sendCommand(['LANCE', 'CREATE', 'docs', 'DIM', '384']);
|
||||
|
||||
// Store document
|
||||
const id = await client.sendCommand(['LANCE', 'STORE', 'docs',
|
||||
'TEXT', 'Deep learning guide',
|
||||
'category', 'AI']);
|
||||
|
||||
// Search
|
||||
const results = await client.sendCommand(['LANCE', 'SEARCH.TEXT', 'docs',
|
||||
'deep learning', 'K', '10']);
|
||||
```
|
||||
|
||||
## Monitoring and Maintenance
|
||||
|
||||
### Health Checks
|
||||
```bash
|
||||
# Check if Lance store is available
|
||||
LANCE LIST
|
||||
|
||||
# Check dataset health
|
||||
LANCE INFO mydataset
|
||||
|
||||
# Test embedding service
|
||||
LANCE EMBED.TEXT "test"
|
||||
```
|
||||
|
||||
### Maintenance Operations
|
||||
```bash
|
||||
# Backup: Use standard Redis backup procedures
|
||||
# The Lance data is stored separately in the data directory
|
||||
|
||||
# Cleanup: Remove unused datasets
|
||||
LANCE DROP old_dataset
|
||||
|
||||
# Reindex: Drop and recreate indexes if needed
|
||||
LANCE DROP dataset_name
|
||||
LANCE CREATE dataset_name DIM 384
|
||||
# Re-import data
|
||||
LANCE CREATE.INDEX dataset_name IVF_PQ
|
||||
```
|
||||
|
||||
This integration provides a powerful foundation for building AI-powered applications with vector search capabilities while maintaining the familiar Redis interface.
|
141
docs/rpc_examples.md
Normal file
141
docs/rpc_examples.md
Normal file
@@ -0,0 +1,141 @@
|
||||
# HeroDB JSON-RPC Examples
|
||||
|
||||
These examples show full JSON-RPC 2.0 payloads for managing HeroDB via the RPC API (enable with `--enable-rpc`). Methods are named as `hero_<function>`. Params are positional arrays; enum values are strings (e.g., `"Redb"`). Copy-paste into Postman or similar clients.
|
||||
|
||||
## Database Management
|
||||
|
||||
### Create Database
|
||||
Creates a new database with optional per-database encryption key (stored write-only in Admin DB 0).
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "hero_createDatabase",
|
||||
"params": [
|
||||
"Redb",
|
||||
{ "name": null, "storage_path": null, "max_size": null, "redis_version": null },
|
||||
null
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
With encryption:
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "hero_createDatabase",
|
||||
"params": [
|
||||
"Sled",
|
||||
{ "name": "secure-db", "storage_path": null, "max_size": null, "redis_version": null },
|
||||
"my-per-db-encryption-key"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### List Databases
|
||||
Returns array of database infos (id, backend, encrypted status, size, etc.).
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 3,
|
||||
"method": "hero_listDatabases",
|
||||
"params": []
|
||||
}
|
||||
```
|
||||
|
||||
### Get Database Info
|
||||
Retrieves detailed info for a specific database.
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 4,
|
||||
"method": "hero_getDatabaseInfo",
|
||||
"params": [1]
|
||||
}
|
||||
```
|
||||
|
||||
### Delete Database
|
||||
Removes physical database file; metadata remains in Admin DB 0.
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 5,
|
||||
"method": "hero_deleteDatabase",
|
||||
"params": [1]
|
||||
}
|
||||
```
|
||||
|
||||
## Access Control
|
||||
|
||||
### Add Access Key
|
||||
Adds a hashed access key for private databases. Permissions: `"read"` or `"readwrite"`.
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 6,
|
||||
"method": "hero_addAccessKey",
|
||||
"params": [2, "my-access-key", "readwrite"]
|
||||
}
|
||||
```
|
||||
|
||||
### List Access Keys
|
||||
Returns array of key hashes, permissions, and creation timestamps.
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 7,
|
||||
"method": "hero_listAccessKeys",
|
||||
"params": [2]
|
||||
}
|
||||
```
|
||||
|
||||
### Delete Access Key
|
||||
Removes key by its SHA-256 hash.
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 8,
|
||||
"method": "hero_deleteAccessKey",
|
||||
"params": [2, "0123abcd...keyhash..."]
|
||||
}
|
||||
```
|
||||
|
||||
### Set Database Public/Private
|
||||
Toggles public access (default true). Private databases require access keys.
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 9,
|
||||
"method": "hero_setDatabasePublic",
|
||||
"params": [2, false]
|
||||
}
|
||||
```
|
||||
|
||||
## Server Info
|
||||
|
||||
### Get Server Stats
|
||||
Returns stats like total databases and uptime.
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 10,
|
||||
"method": "hero_getServerStats",
|
||||
"params": []
|
||||
}
|
||||
```
|
||||
|
||||
## Notes
|
||||
- Per-database encryption keys are write-only; set at creation and used transparently.
|
||||
- Access keys are hashed (SHA-256) for storage; provide plaintext in requests.
|
||||
- Backend options: `"Redb"` (default) or `"Sled"`.
|
||||
- Config object fields (name, storage_path, etc.) are optional and currently ignored but positional.
|
253
docs/tantivy.md
Normal file
253
docs/tantivy.md
Normal file
@@ -0,0 +1,253 @@
|
||||
# Tantivy Full‑Text Backend (JSON‑RPC)
|
||||
|
||||
This document explains how to use HeroDB’s Tantivy-backed full‑text search as a dedicated database backend and provides copy‑pasteable JSON‑RPC requests. Tantivy is available only for non‑admin databases (db_id >= 1). Admin DB 0 always uses Redb/Sled and rejects FT operations.
|
||||
|
||||
Important characteristics:
|
||||
- Tantivy is a third backend alongside Redb and Sled. It provides search indexes only; there is no KV store backing it.
|
||||
- On Tantivy databases, Redis KV/list/hash commands are rejected; only FT commands and basic control (SELECT, CLIENT, INFO, etc.) are allowed.
|
||||
- FT JSON‑RPC is namespaced as "herodb" and methods are named with underscore: herodb_ftCreate, herodb_ftAdd, herodb_ftSearch, herodb_ftDel, herodb_ftInfo, herodb_ftDrop.
|
||||
|
||||
Reference to server implementation:
|
||||
- RPC methods are defined in [rust.trait Rpc()](src/rpc.rs:70):
|
||||
- [rust.fn ft_create()](src/rpc.rs:121)
|
||||
- [rust.fn ft_add()](src/rpc.rs:130)
|
||||
- [rust.fn ft_search()](src/rpc.rs:141)
|
||||
- [rust.fn ft_del()](src/rpc.rs:154)
|
||||
- [rust.fn ft_info()](src/rpc.rs:158)
|
||||
- [rust.fn ft_drop()](src/rpc.rs:162)
|
||||
|
||||
Notes on responses:
|
||||
- ftCreate/ftAdd/ftDel/ftDrop return a JSON boolean: true on success.
|
||||
- ftSearch/ftInfo return a JSON object with a single key "resp" containing a RESP‑encoded string (wire format used by Redis). You can display or parse it on the client side as needed.
|
||||
|
||||
RESP usage (redis-cli):
|
||||
- For RESP clients, you must SELECT the Tantivy database first. SELECT now succeeds for Tantivy DBs without opening KV storage.
|
||||
- After SELECT, you can run FT.* commands within that DB context.
|
||||
|
||||
Example with redis-cli:
|
||||
```bash
|
||||
# Connect to server
|
||||
redis-cli -p 6379
|
||||
|
||||
# Select Tantivy DB 1 (public by default)
|
||||
SELECT 1
|
||||
# → OK
|
||||
|
||||
# Create index
|
||||
FT.CREATE product_catalog SCHEMA title TEXT description TEXT category TAG price NUMERIC rating NUMERIC location GEO
|
||||
# → OK
|
||||
|
||||
# Add a document
|
||||
FT.ADD product_catalog product:1 1.0 title "Wireless Bluetooth Headphones" description "Premium noise-canceling headphones with 30-hour battery life" category "electronics,audio" price 299.99 rating 4.5 location "-122.4194,37.7749"
|
||||
# → OK
|
||||
|
||||
# Search
|
||||
FT.SEARCH product_catalog wireless LIMIT 0 3
|
||||
# → RESP array with hits
|
||||
```
|
||||
|
||||
Storage layout (on disk):
|
||||
- Indices are stored per database under:
|
||||
- <base_dir>/search_indexes/<db_id>/<index_name>
|
||||
- Example: /tmp/test/search_indexes/1/product_catalog
|
||||
|
||||
0) Create a new Tantivy database
|
||||
|
||||
Use herodb_createDatabase with backend "Tantivy". DB 0 cannot be Tantivy.
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "herodb_createDatabase",
|
||||
"params": [
|
||||
"Tantivy",
|
||||
{ "name": "search-db", "storage_path": null, "max_size": null, "redis_version": null },
|
||||
null
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The response contains the allocated db_id (>= 1). Use that id in the calls below.
|
||||
|
||||
1) FT.CREATE — create an index with schema
|
||||
|
||||
Method: herodb_ftCreate → [rust.fn ft_create()](src/rpc.rs:121)
|
||||
|
||||
Schema format is an array of tuples: [ [field_name, field_type, [options...] ], ... ]
|
||||
Supported field types: "TEXT", "NUMERIC" (defaults to F64), "TAG", "GEO"
|
||||
Supported options (subset): "WEIGHT", "SORTABLE", "NOINDEX", "SEPARATOR", "CASESENSITIVE"
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "herodb_ftCreate",
|
||||
"params": [
|
||||
1,
|
||||
"product_catalog",
|
||||
[
|
||||
["title", "TEXT", ["SORTABLE"]],
|
||||
["description", "TEXT", []],
|
||||
["category", "TAG", ["SEPARATOR", ","]],
|
||||
["price", "NUMERIC", ["SORTABLE"]],
|
||||
["rating", "NUMERIC", []],
|
||||
["location", "GEO", []]
|
||||
]
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Returns: true on success.
|
||||
|
||||
2) FT.ADD — add or replace a document
|
||||
|
||||
Method: herodb_ftAdd → [rust.fn ft_add()](src/rpc.rs:130)
|
||||
|
||||
Fields is an object (map) of field_name → value (all values are sent as strings). GEO expects "lat,lon".
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 3,
|
||||
"method": "herodb_ftAdd",
|
||||
"params": [
|
||||
1,
|
||||
"product_catalog",
|
||||
"product:1",
|
||||
1.0,
|
||||
{
|
||||
"title": "Wireless Bluetooth Headphones",
|
||||
"description": "Premium noise-canceling headphones with 30-hour battery life",
|
||||
"category": "electronics,audio",
|
||||
"price": "299.99",
|
||||
"rating": "4.5",
|
||||
"location": "-122.4194,37.7749"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Returns: true on success.
|
||||
|
||||
3) FT.SEARCH — query an index
|
||||
|
||||
Method: herodb_ftSearch → [rust.fn ft_search()](src/rpc.rs:141)
|
||||
|
||||
Parameters: (db_id, index_name, query, filters?, limit?, offset?, return_fields?)
|
||||
- filters: array of [field, value] pairs (Equals filter)
|
||||
- limit/offset: numbers (defaults: limit=10, offset=0)
|
||||
- return_fields: array of field names to include (optional)
|
||||
|
||||
Simple query:
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 4,
|
||||
"method": "herodb_ftSearch",
|
||||
"params": [1, "product_catalog", "wireless", null, 10, 0, null]
|
||||
}
|
||||
```
|
||||
|
||||
Pagination + filters + selected fields:
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 5,
|
||||
"method": "herodb_ftSearch",
|
||||
"params": [
|
||||
1,
|
||||
"product_catalog",
|
||||
"mouse",
|
||||
[["category", "electronics"]],
|
||||
5,
|
||||
0,
|
||||
["title", "price", "rating"]
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Response shape:
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 5,
|
||||
"result": { "resp": "*...RESP encoded array..." }
|
||||
}
|
||||
```
|
||||
|
||||
4) FT.INFO — index metadata
|
||||
|
||||
Method: herodb_ftInfo → [rust.fn ft_info()](src/rpc.rs:158)
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 6,
|
||||
"method": "herodb_ftInfo",
|
||||
"params": [1, "product_catalog"]
|
||||
}
|
||||
```
|
||||
|
||||
Response shape:
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 6,
|
||||
"result": { "resp": "*...RESP encoded array with fields and counts..." }
|
||||
}
|
||||
```
|
||||
|
||||
5) FT.DEL — delete by doc id
|
||||
|
||||
Method: herodb_ftDel → [rust.fn ft_del()](src/rpc.rs:154)
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 7,
|
||||
"method": "herodb_ftDel",
|
||||
"params": [1, "product_catalog", "product:1"]
|
||||
}
|
||||
```
|
||||
|
||||
Returns: true on success. Note: current implementation logs and returns success; physical delete may be a no‑op until delete is finalized in the engine.
|
||||
|
||||
6) FT.DROP — drop an index
|
||||
|
||||
Method: herodb_ftDrop → [rust.fn ft_drop()](src/rpc.rs:162)
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 8,
|
||||
"method": "herodb_ftDrop",
|
||||
"params": [1, "product_catalog"]
|
||||
}
|
||||
```
|
||||
|
||||
Returns: true on success.
|
||||
|
||||
Field types and options
|
||||
|
||||
- TEXT: stored/indexed/tokenized text. "SORTABLE" marks it fast (stored + fast path in our wrapper).
|
||||
- NUMERIC: stored/indexed numeric; default precision F64. "SORTABLE" enables fast column.
|
||||
- TAG: exact matching terms. Options: "SEPARATOR" (default ","), "CASESENSITIVE" (default false).
|
||||
- GEO: "lat,lon" string; stored as two numeric fields internally.
|
||||
|
||||
Backend and permission gating
|
||||
|
||||
- FT methods are rejected on DB 0.
|
||||
- FT methods require the database backend to be Tantivy; otherwise RPC returns an error.
|
||||
- Write‑like FT methods (create/add/del/drop) follow the same permission model as Redis writes on selected databases.
|
||||
|
||||
Troubleshooting
|
||||
|
||||
- "DB backend is not Tantivy": ensure the database was created with backend "Tantivy".
|
||||
- "FT not allowed on DB 0": use a non‑admin database id (>= 1).
|
||||
- Empty search results: confirm that the queried fields are tokenized/indexed (TEXT) and that documents were added successfully.
|
||||
|
||||
Related docs
|
||||
|
||||
- Command‑level search overview: [docs/search.md](docs/search.md:1)
|
||||
- RPC definitions: [src/rpc.rs](src/rpc.rs:1)
|
@@ -1,191 +1,6 @@
|
||||
# HeroDB Examples
|
||||
# HeroDB Tantivy Search Examples
|
||||
|
||||
This directory contains examples demonstrating HeroDB's capabilities including full-text search powered by Tantivy and vector database operations using Lance.
|
||||
|
||||
## Available Examples
|
||||
|
||||
1. **[Tantivy Search Demo](#tantivy-search-demo-bash-script)** - Full-text search capabilities
|
||||
2. **[Lance Vector Database Demo](#lance-vector-database-demo-bash-script)** - Vector database and AI operations
|
||||
3. **[AGE Encryption Demo](age_bash_demo.sh)** - Cryptographic operations
|
||||
4. **[Simple Demo](simple_demo.sh)** - Basic Redis operations
|
||||
|
||||
---
|
||||
|
||||
## Lance Vector Database Demo (Bash Script)
|
||||
|
||||
### Overview
|
||||
The `lance_vector_demo.sh` script provides a comprehensive demonstration of HeroDB's vector database capabilities using Lance. It showcases vector storage, similarity search, multimodal data handling, and AI-powered operations with external embedding services.
|
||||
|
||||
### Prerequisites
|
||||
1. **HeroDB Server**: The server must be running (default port 6379)
|
||||
2. **Redis CLI**: The `redis-cli` tool must be installed and available in your PATH
|
||||
3. **Embedding Service** (optional): For full functionality, set up an external embedding service
|
||||
|
||||
### Running the Demo
|
||||
|
||||
#### Step 1: Start HeroDB Server
|
||||
```bash
|
||||
# From the project root directory
|
||||
cargo run -- --dir ./test_data --port 6379
|
||||
```
|
||||
|
||||
#### Step 2: Run the Demo (in a new terminal)
|
||||
```bash
|
||||
# From the project root directory
|
||||
./examples/lance_vector_demo.sh
|
||||
```
|
||||
|
||||
### What the Demo Covers
|
||||
|
||||
The script demonstrates comprehensive vector database operations:
|
||||
|
||||
1. **Dataset Management**
|
||||
- Creating vector datasets with custom dimensions
|
||||
- Defining schemas with metadata fields
|
||||
- Listing and inspecting datasets
|
||||
- Dataset information and statistics
|
||||
|
||||
2. **Embedding Operations**
|
||||
- Text embedding generation via external services
|
||||
- Multimodal embedding support (text + images)
|
||||
- Batch embedding operations
|
||||
|
||||
3. **Data Storage**
|
||||
- Storing text documents with automatic embedding
|
||||
- Storing images with metadata
|
||||
- Multimodal content storage
|
||||
- Rich metadata support
|
||||
|
||||
4. **Vector Search**
|
||||
- Similarity search with raw vectors
|
||||
- Text-based semantic search
|
||||
- Configurable search parameters (K, NPROBES, REFINE)
|
||||
- Cross-modal search capabilities
|
||||
|
||||
5. **Index Management**
|
||||
- Creating IVF_PQ indexes for performance
|
||||
- Custom index parameters
|
||||
- Performance optimization
|
||||
|
||||
6. **Advanced Features**
|
||||
- Error handling and recovery
|
||||
- Performance testing concepts
|
||||
- Monitoring and maintenance
|
||||
- Cleanup operations
|
||||
|
||||
### Key Lance Commands Demonstrated
|
||||
|
||||
#### Dataset Management
|
||||
```bash
|
||||
# Create vector dataset
|
||||
LANCE CREATE documents DIM 384
|
||||
|
||||
# Create dataset with schema
|
||||
LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool
|
||||
|
||||
# List datasets
|
||||
LANCE LIST
|
||||
|
||||
# Get dataset information
|
||||
LANCE INFO documents
|
||||
```
|
||||
|
||||
#### Data Operations
|
||||
```bash
|
||||
# Store text with metadata
|
||||
LANCE STORE documents TEXT "Machine learning tutorial" category "education" author "John Doe"
|
||||
|
||||
# Store image with metadata
|
||||
LANCE STORE images IMAGE "base64_encoded_image..." filename "photo.jpg" tags "nature,landscape"
|
||||
|
||||
# Store multimodal content
|
||||
LANCE STORE content TEXT "Product description" IMAGE "base64_image..." type "product"
|
||||
```
|
||||
|
||||
#### Search Operations
|
||||
```bash
|
||||
# Search with raw vector
|
||||
LANCE SEARCH documents VECTOR "0.1,0.2,0.3,0.4" K 5
|
||||
|
||||
# Semantic text search
|
||||
LANCE SEARCH.TEXT documents "artificial intelligence" K 10 NPROBES 20
|
||||
|
||||
# Generate embeddings
|
||||
LANCE EMBED.TEXT "Hello world" "Machine learning"
|
||||
```
|
||||
|
||||
#### Index Management
|
||||
```bash
|
||||
# Create performance index
|
||||
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 256 SUBVECTORS 16
|
||||
|
||||
# Drop dataset
|
||||
LANCE DROP old_dataset
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
#### Setting Up Embedding Service
|
||||
```bash
|
||||
# Configure embedding service URL
|
||||
redis-cli HSET config:core:aiembed url "http://your-embedding-service:8080/embed"
|
||||
|
||||
# Optional: Set authentication token
|
||||
redis-cli HSET config:core:aiembed token "your-api-token"
|
||||
```
|
||||
|
||||
#### Embedding Service API
|
||||
Your embedding service should accept POST requests:
|
||||
```json
|
||||
{
|
||||
"texts": ["text1", "text2"],
|
||||
"images": ["base64_image1", "base64_image2"],
|
||||
"model": "your-model-name"
|
||||
}
|
||||
```
|
||||
|
||||
And return responses:
|
||||
```json
|
||||
{
|
||||
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
|
||||
"model": "model-name",
|
||||
"usage": {"tokens": 100, "requests": 2}
|
||||
}
|
||||
```
|
||||
|
||||
### Interactive Features
|
||||
|
||||
The demo script includes:
|
||||
- **Colored output** for better readability
|
||||
- **Step-by-step execution** with explanations
|
||||
- **Error handling** demonstrations
|
||||
- **Automatic cleanup** options
|
||||
- **Performance testing** concepts
|
||||
- **Real-world usage** examples
|
||||
|
||||
### Use Cases Demonstrated
|
||||
|
||||
1. **Document Search System**
|
||||
- Semantic document retrieval
|
||||
- Metadata filtering
|
||||
- Relevance ranking
|
||||
|
||||
2. **Image Similarity Search**
|
||||
- Visual content matching
|
||||
- Tag-based filtering
|
||||
- Multimodal queries
|
||||
|
||||
3. **Product Recommendations**
|
||||
- Feature-based similarity
|
||||
- Category filtering
|
||||
- Price range queries
|
||||
|
||||
4. **Content Management**
|
||||
- Mixed media storage
|
||||
- Cross-modal search
|
||||
- Rich metadata support
|
||||
|
||||
---
|
||||
This directory contains examples demonstrating HeroDB's full-text search capabilities powered by Tantivy.
|
||||
|
||||
## Tantivy Search Demo (Bash Script)
|
||||
|
||||
|
@@ -1,426 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Lance Vector Database Demo Script
|
||||
# This script demonstrates all Lance vector database operations in HeroDB
|
||||
|
||||
set -e # Exit on any error
|
||||
|
||||
# Configuration
|
||||
REDIS_HOST="localhost"
|
||||
REDIS_PORT="6379"
|
||||
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Helper functions
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
execute_command() {
|
||||
local cmd="$1"
|
||||
local description="$2"
|
||||
|
||||
echo
|
||||
log_info "Executing: $description"
|
||||
echo "Command: $cmd"
|
||||
|
||||
if result=$($cmd 2>&1); then
|
||||
log_success "Result: $result"
|
||||
else
|
||||
log_error "Failed: $result"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check if HeroDB is running
|
||||
check_herodb() {
|
||||
log_info "Checking if HeroDB is running..."
|
||||
if ! $REDIS_CLI ping > /dev/null 2>&1; then
|
||||
log_error "HeroDB is not running. Please start it first:"
|
||||
echo " cargo run -- --dir ./test_data --port $REDIS_PORT"
|
||||
exit 1
|
||||
fi
|
||||
log_success "HeroDB is running"
|
||||
}
|
||||
|
||||
# Setup embedding service configuration
|
||||
setup_embedding_service() {
|
||||
log_info "Setting up embedding service configuration..."
|
||||
|
||||
# Note: This is a mock URL for demonstration
|
||||
# In production, replace with your actual embedding service
|
||||
execute_command \
|
||||
"$REDIS_CLI HSET config:core:aiembed url 'http://localhost:8080/embed'" \
|
||||
"Configure embedding service URL"
|
||||
|
||||
# Optional: Set authentication token
|
||||
# execute_command \
|
||||
# "$REDIS_CLI HSET config:core:aiembed token 'your-api-token'" \
|
||||
# "Configure embedding service token"
|
||||
|
||||
log_warning "Note: Embedding service at http://localhost:8080/embed is not running."
|
||||
log_warning "Some operations will fail, but this demonstrates the command structure."
|
||||
}
|
||||
|
||||
# Dataset Management Operations
|
||||
demo_dataset_management() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " DATASET MANAGEMENT DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
# List datasets (should be empty initially)
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"List all datasets (initially empty)"
|
||||
|
||||
# Create a simple dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE documents DIM 384" \
|
||||
"Create a simple document dataset with 384 dimensions"
|
||||
|
||||
# Create a dataset with schema
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool description:string" \
|
||||
"Create products dataset with custom schema"
|
||||
|
||||
# Create an image dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE images DIM 512 SCHEMA filename:string tags:string width:int height:int" \
|
||||
"Create images dataset for multimodal content"
|
||||
|
||||
# List datasets again
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"List all datasets (should show 3 datasets)"
|
||||
|
||||
# Get info about datasets
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE INFO documents" \
|
||||
"Get information about documents dataset"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE INFO products" \
|
||||
"Get information about products dataset"
|
||||
}
|
||||
|
||||
# Embedding Operations
|
||||
demo_embedding_operations() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " EMBEDDING OPERATIONS DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_warning "The following operations will fail because no embedding service is running."
|
||||
log_warning "This demonstrates the command structure and error handling."
|
||||
|
||||
# Try to embed text (will fail without embedding service)
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE EMBED.TEXT 'Hello world'" \
|
||||
"Generate embedding for single text" || true
|
||||
|
||||
# Try to embed multiple texts
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE EMBED.TEXT 'Machine learning' 'Artificial intelligence' 'Deep learning'" \
|
||||
"Generate embeddings for multiple texts" || true
|
||||
}
|
||||
|
||||
# Data Storage Operations
|
||||
demo_data_storage() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " DATA STORAGE DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_warning "Storage operations will fail without embedding service, but show command structure."
|
||||
|
||||
# Store text documents
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE documents TEXT 'Introduction to machine learning algorithms and their applications in modern AI systems' category 'education' author 'John Doe' difficulty 'beginner'" \
|
||||
"Store a document with text and metadata" || true
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE documents TEXT 'Deep learning neural networks for computer vision tasks' category 'research' author 'Jane Smith' difficulty 'advanced'" \
|
||||
"Store another document" || true
|
||||
|
||||
# Store product information
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE products TEXT 'High-performance laptop with 16GB RAM and SSD storage' category 'electronics' price '1299.99' available 'true'" \
|
||||
"Store product with text description" || true
|
||||
|
||||
# Store image with metadata (using placeholder base64)
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE images IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sample.png' tags 'test,demo' width '1' height '1'" \
|
||||
"Store image with metadata (1x1 pixel PNG)" || true
|
||||
|
||||
# Store multimodal content
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE images TEXT 'Beautiful sunset over mountains' IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sunset.png' tags 'nature,landscape' location 'California'" \
|
||||
"Store multimodal content (text + image)" || true
|
||||
}
|
||||
|
||||
# Search Operations
|
||||
demo_search_operations() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " SEARCH OPERATIONS DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_warning "Search operations will fail without data, but show command structure."
|
||||
|
||||
# Search with raw vector
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 5" \
|
||||
"Search with raw vector (5 results)" || true
|
||||
|
||||
# Search with vector and parameters
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 10 NPROBES 20 REFINE 2" \
|
||||
"Search with vector and advanced parameters" || true
|
||||
|
||||
# Text-based search
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH.TEXT documents 'machine learning algorithms' K 5" \
|
||||
"Search using text query" || true
|
||||
|
||||
# Text search with parameters
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH.TEXT products 'laptop computer' K 3 NPROBES 10" \
|
||||
"Search products using text with parameters" || true
|
||||
|
||||
# Search in image dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH.TEXT images 'sunset landscape' K 5" \
|
||||
"Search images using text description" || true
|
||||
}
|
||||
|
||||
# Index Management Operations
|
||||
demo_index_management() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " INDEX MANAGEMENT DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
# Create indexes for better search performance
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE.INDEX documents IVF_PQ" \
|
||||
"Create default IVF_PQ index for documents"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE.INDEX products IVF_PQ PARTITIONS 512 SUBVECTORS 32" \
|
||||
"Create IVF_PQ index with custom parameters for products"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE.INDEX images IVF_PQ PARTITIONS 256 SUBVECTORS 16" \
|
||||
"Create IVF_PQ index for images dataset"
|
||||
|
||||
log_success "Indexes created successfully"
|
||||
}
|
||||
|
||||
# Advanced Usage Examples
|
||||
demo_advanced_usage() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " ADVANCED USAGE EXAMPLES"
|
||||
echo "=========================================="
|
||||
|
||||
# Create a specialized dataset for semantic search
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE semantic_search DIM 1536 SCHEMA title:string content:string url:string timestamp:string source:string" \
|
||||
"Create dataset for semantic search with rich metadata"
|
||||
|
||||
# Demonstrate batch operations concept
|
||||
log_info "Batch operations example (would store multiple items):"
|
||||
echo " for doc in documents:"
|
||||
echo " LANCE STORE semantic_search TEXT \"\$doc_content\" title \"\$title\" url \"\$url\""
|
||||
|
||||
# Show monitoring commands
|
||||
log_info "Monitoring and maintenance commands:"
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"List all datasets for monitoring"
|
||||
|
||||
# Show dataset statistics
|
||||
for dataset in documents products images semantic_search; do
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE INFO $dataset" \
|
||||
"Get statistics for $dataset" || true
|
||||
done
|
||||
}
|
||||
|
||||
# Cleanup Operations
|
||||
demo_cleanup() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " CLEANUP OPERATIONS DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_info "Demonstrating cleanup operations..."
|
||||
|
||||
# Drop individual datasets
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP semantic_search" \
|
||||
"Drop semantic_search dataset"
|
||||
|
||||
# List remaining datasets
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"List remaining datasets"
|
||||
|
||||
# Ask user if they want to clean up all test data
|
||||
echo
|
||||
read -p "Do you want to clean up all test datasets? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP documents" \
|
||||
"Drop documents dataset"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP products" \
|
||||
"Drop products dataset"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP images" \
|
||||
"Drop images dataset"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"Verify all datasets are cleaned up"
|
||||
|
||||
log_success "All test datasets cleaned up"
|
||||
else
|
||||
log_info "Keeping test datasets for further experimentation"
|
||||
fi
|
||||
}
|
||||
|
||||
# Error Handling Demo
|
||||
demo_error_handling() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " ERROR HANDLING DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_info "Demonstrating various error conditions..."
|
||||
|
||||
# Try to access non-existent dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE INFO nonexistent_dataset" \
|
||||
"Try to get info for non-existent dataset" || true
|
||||
|
||||
# Try to search non-existent dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH nonexistent_dataset VECTOR '0.1,0.2' K 5" \
|
||||
"Try to search non-existent dataset" || true
|
||||
|
||||
# Try to drop non-existent dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP nonexistent_dataset" \
|
||||
"Try to drop non-existent dataset" || true
|
||||
|
||||
# Try invalid vector format
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH documents VECTOR 'invalid,vector,format' K 5" \
|
||||
"Try search with invalid vector format" || true
|
||||
|
||||
log_info "Error handling demonstration complete"
|
||||
}
|
||||
|
||||
# Performance Testing Demo
|
||||
demo_performance_testing() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " PERFORMANCE TESTING DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_info "Creating performance test dataset..."
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE perf_test DIM 128 SCHEMA batch_id:string item_id:string" \
|
||||
"Create performance test dataset"
|
||||
|
||||
log_info "Performance testing would involve:"
|
||||
echo " 1. Bulk loading thousands of vectors"
|
||||
echo " 2. Creating indexes with different parameters"
|
||||
echo " 3. Measuring search latency with various K values"
|
||||
echo " 4. Testing different NPROBES settings"
|
||||
echo " 5. Monitoring memory usage"
|
||||
|
||||
log_info "Example performance test commands:"
|
||||
echo " # Test search speed with different parameters"
|
||||
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10"
|
||||
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10 NPROBES 50"
|
||||
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 100 NPROBES 100"
|
||||
|
||||
# Clean up performance test dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP perf_test" \
|
||||
"Clean up performance test dataset"
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
echo "=========================================="
|
||||
echo " LANCE VECTOR DATABASE DEMO SCRIPT"
|
||||
echo "=========================================="
|
||||
echo
|
||||
echo "This script demonstrates all Lance vector database operations."
|
||||
echo "Note: Some operations will fail without a running embedding service."
|
||||
echo "This is expected and demonstrates error handling."
|
||||
echo
|
||||
|
||||
# Check prerequisites
|
||||
check_herodb
|
||||
|
||||
# Setup
|
||||
setup_embedding_service
|
||||
|
||||
# Run demos
|
||||
demo_dataset_management
|
||||
demo_embedding_operations
|
||||
demo_data_storage
|
||||
demo_search_operations
|
||||
demo_index_management
|
||||
demo_advanced_usage
|
||||
demo_error_handling
|
||||
demo_performance_testing
|
||||
|
||||
# Cleanup
|
||||
demo_cleanup
|
||||
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " DEMO COMPLETE"
|
||||
echo "=========================================="
|
||||
echo
|
||||
log_success "Lance vector database demo completed successfully!"
|
||||
echo
|
||||
echo "Next steps:"
|
||||
echo "1. Set up a real embedding service (OpenAI, Hugging Face, etc.)"
|
||||
echo "2. Update the embedding service URL configuration"
|
||||
echo "3. Try storing and searching real data"
|
||||
echo "4. Experiment with different vector dimensions and index parameters"
|
||||
echo "5. Build your AI-powered application!"
|
||||
echo
|
||||
echo "For more information, see docs/lance_vector_db.md"
|
||||
}
|
||||
|
||||
# Run the demo
|
||||
main "$@"
|
489
src/admin_meta.rs
Normal file
489
src/admin_meta.rs
Normal file
@@ -0,0 +1,489 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, OnceLock, Mutex, RwLock};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::error::DBError;
|
||||
use crate::options;
|
||||
use crate::rpc::Permissions;
|
||||
use crate::storage::Storage;
|
||||
use crate::storage_sled::SledStorage;
|
||||
use crate::storage_trait::StorageBackend;
|
||||
|
||||
// Key builders
|
||||
fn k_admin_next_id() -> &'static str {
|
||||
"admin:next_id"
|
||||
}
|
||||
fn k_admin_dbs() -> &'static str {
|
||||
"admin:dbs"
|
||||
}
|
||||
fn k_meta_db(id: u64) -> String {
|
||||
format!("meta:db:{}", id)
|
||||
}
|
||||
fn k_meta_db_keys(id: u64) -> String {
|
||||
format!("meta:db:{}:keys", id)
|
||||
}
|
||||
fn k_meta_db_enc(id: u64) -> String {
|
||||
format!("meta:db:{}:enc", id)
|
||||
}
|
||||
|
||||
// Global cache of admin DB 0 handles per base_dir to avoid sled/reDB file-lock contention
|
||||
// and to correctly isolate different test instances with distinct directories.
|
||||
static ADMIN_STORAGES: OnceLock<RwLock<HashMap<String, Arc<dyn StorageBackend>>>> = OnceLock::new();
|
||||
|
||||
// Global registry for data DB storages to avoid double-open across process.
|
||||
static DATA_STORAGES: OnceLock<RwLock<HashMap<u64, Arc<dyn StorageBackend>>>> = OnceLock::new();
|
||||
static DATA_INIT_LOCK: Mutex<()> = Mutex::new(());
|
||||
|
||||
fn init_admin_storage(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
) -> Result<Arc<dyn StorageBackend>, DBError> {
|
||||
let db_file = PathBuf::from(base_dir).join("0.db");
|
||||
if let Some(parent_dir) = db_file.parent() {
|
||||
std::fs::create_dir_all(parent_dir).map_err(|e| {
|
||||
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
|
||||
})?;
|
||||
}
|
||||
let storage: Arc<dyn StorageBackend> = match backend {
|
||||
options::BackendType::Redb => Arc::new(Storage::new(&db_file, true, Some(admin_secret))?),
|
||||
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, true, Some(admin_secret))?),
|
||||
options::BackendType::Tantivy => {
|
||||
return Err(DBError("Admin DB 0 cannot use Tantivy backend".to_string()))
|
||||
}
|
||||
};
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
// Get or initialize a cached handle to admin DB 0 per base_dir (thread-safe, no double-open race)
|
||||
pub fn open_admin_storage(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
) -> Result<Arc<dyn StorageBackend>, DBError> {
|
||||
let map = ADMIN_STORAGES.get_or_init(|| RwLock::new(HashMap::new()));
|
||||
// Fast path
|
||||
if let Some(st) = map.read().unwrap().get(base_dir) {
|
||||
return Ok(st.clone());
|
||||
}
|
||||
// Slow path with write lock
|
||||
{
|
||||
let mut w = map.write().unwrap();
|
||||
if let Some(st) = w.get(base_dir) {
|
||||
return Ok(st.clone());
|
||||
}
|
||||
|
||||
// Detect existing 0.db backend by filesystem, if present.
|
||||
let admin_path = PathBuf::from(base_dir).join("0.db");
|
||||
let detected = if admin_path.exists() {
|
||||
if admin_path.is_file() {
|
||||
Some(options::BackendType::Redb)
|
||||
} else if admin_path.is_dir() {
|
||||
Some(options::BackendType::Sled)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let effective_backend = match detected {
|
||||
Some(d) if d != backend => {
|
||||
eprintln!(
|
||||
"warning: Admin DB 0 at {} appears to be {:?}, but process default is {:?}. Using detected backend.",
|
||||
admin_path.display(),
|
||||
d,
|
||||
backend
|
||||
);
|
||||
d
|
||||
}
|
||||
Some(d) => d,
|
||||
None => backend, // First boot: use requested backend to initialize 0.db
|
||||
};
|
||||
|
||||
let st = init_admin_storage(base_dir, effective_backend, admin_secret)?;
|
||||
w.insert(base_dir.to_string(), st.clone());
|
||||
Ok(st)
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure admin structures exist in encrypted DB 0
|
||||
pub fn ensure_bootstrap(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
) -> Result<(), DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
|
||||
// Initialize next id if missing
|
||||
if !admin.exists(k_admin_next_id())? {
|
||||
admin.set(k_admin_next_id().to_string(), "1".to_string())?;
|
||||
}
|
||||
// admin:dbs is a hash; it's fine if it doesn't exist (hlen -> 0)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Get or initialize a shared handle to a data DB (> 0), avoiding double-open across subsystems
|
||||
pub fn open_data_storage(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
) -> Result<Arc<dyn StorageBackend>, DBError> {
|
||||
if id == 0 {
|
||||
return open_admin_storage(base_dir, backend, admin_secret);
|
||||
}
|
||||
|
||||
// Validate existence in admin metadata
|
||||
if !db_exists(base_dir, backend.clone(), admin_secret, id)? {
|
||||
return Err(DBError(format!(
|
||||
"Cannot open database instance {}, as that database instance does not exist.",
|
||||
id
|
||||
)));
|
||||
}
|
||||
|
||||
let map = DATA_STORAGES.get_or_init(|| RwLock::new(HashMap::new()));
|
||||
// Fast path
|
||||
if let Some(st) = map.read().unwrap().get(&id) {
|
||||
return Ok(st.clone());
|
||||
}
|
||||
|
||||
// Slow path with init lock
|
||||
let _guard = DATA_INIT_LOCK.lock().unwrap();
|
||||
if let Some(st) = map.read().unwrap().get(&id) {
|
||||
return Ok(st.clone());
|
||||
}
|
||||
|
||||
// Resolve effective backend for this db id:
|
||||
// 1) Try admin meta "backend" field
|
||||
// 2) If missing, sniff filesystem (file => Redb, dir => Sled), then persist into admin meta
|
||||
// 3) Fallback to requested 'backend' (startup default) if nothing else is known
|
||||
let meta_backend = get_database_backend(base_dir, backend.clone(), admin_secret, id).ok().flatten();
|
||||
let db_path = PathBuf::from(base_dir).join(format!("{}.db", id));
|
||||
let sniffed_backend = if db_path.exists() {
|
||||
if db_path.is_file() {
|
||||
Some(options::BackendType::Redb)
|
||||
} else if db_path.is_dir() {
|
||||
Some(options::BackendType::Sled)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let effective_backend = meta_backend.clone().or(sniffed_backend).unwrap_or(backend.clone());
|
||||
|
||||
// If we had to sniff (i.e., meta missing), persist it for future robustness
|
||||
if meta_backend.is_none() {
|
||||
let _ = set_database_backend(base_dir, backend.clone(), admin_secret, id, effective_backend.clone());
|
||||
}
|
||||
|
||||
// Warn if caller-provided backend differs from effective
|
||||
if effective_backend != backend {
|
||||
eprintln!(
|
||||
"notice: Database {} backend resolved to {:?} (caller requested {:?}). Using resolved backend.",
|
||||
id, effective_backend, backend
|
||||
);
|
||||
}
|
||||
|
||||
// Determine per-db encryption (from admin meta)
|
||||
let enc = get_enc_key(base_dir, backend.clone(), admin_secret, id)?;
|
||||
let should_encrypt = enc.is_some();
|
||||
|
||||
// Build database file path and ensure parent dir exists
|
||||
let db_file = PathBuf::from(base_dir).join(format!("{}.db", id));
|
||||
if let Some(parent_dir) = db_file.parent() {
|
||||
std::fs::create_dir_all(parent_dir).map_err(|e| {
|
||||
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
|
||||
})?;
|
||||
}
|
||||
|
||||
// Open storage using the effective backend
|
||||
let storage: Arc<dyn StorageBackend> = match effective_backend {
|
||||
options::BackendType::Redb => Arc::new(Storage::new(&db_file, should_encrypt, enc.as_deref())?),
|
||||
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, should_encrypt, enc.as_deref())?),
|
||||
options::BackendType::Tantivy => {
|
||||
return Err(DBError("Tantivy backend has no KV storage; use FT.* commands only".to_string()))
|
||||
}
|
||||
};
|
||||
|
||||
// Publish to registry
|
||||
map.write().unwrap().insert(id, storage.clone());
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
// Allocate the next DB id and persist new pointer
|
||||
pub fn allocate_next_id(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
) -> Result<u64, DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let cur = admin
|
||||
.get(k_admin_next_id())?
|
||||
.unwrap_or_else(|| "1".to_string());
|
||||
let id: u64 = cur.parse().unwrap_or(1);
|
||||
let next = id.checked_add(1).ok_or_else(|| DBError("next_id overflow".into()))?;
|
||||
admin.set(k_admin_next_id().to_string(), next.to_string())?;
|
||||
|
||||
// Register into admin:dbs set/hash
|
||||
let _ = admin.hset(k_admin_dbs(), vec![(id.to_string(), "1".to_string())])?;
|
||||
|
||||
// Default meta for the new db: public true
|
||||
let meta_key = k_meta_db(id);
|
||||
let _ = admin.hset(&meta_key, vec![("public".to_string(), "true".to_string())])?;
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
// Check existence of a db id in admin:dbs
|
||||
pub fn db_exists(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
) -> Result<bool, DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
Ok(admin.hexists(k_admin_dbs(), &id.to_string())?)
|
||||
}
|
||||
|
||||
// Get per-db encryption key, if any
|
||||
pub fn get_enc_key(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
) -> Result<Option<String>, DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
admin.get(&k_meta_db_enc(id))
|
||||
}
|
||||
|
||||
// Set per-db encryption key (called during create)
|
||||
pub fn set_enc_key(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
key: &str,
|
||||
) -> Result<(), DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
admin.set(k_meta_db_enc(id), key.to_string())
|
||||
}
|
||||
|
||||
// Set database public flag
|
||||
pub fn set_database_public(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
public: bool,
|
||||
) -> Result<(), DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let mk = k_meta_db(id);
|
||||
let _ = admin.hset(&mk, vec![("public".to_string(), public.to_string())])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Persist per-db backend type in admin metadata (module-scope)
|
||||
pub fn set_database_backend(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
db_backend: options::BackendType,
|
||||
) -> Result<(), DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let mk = k_meta_db(id);
|
||||
let val = match db_backend {
|
||||
options::BackendType::Redb => "Redb",
|
||||
options::BackendType::Sled => "Sled",
|
||||
options::BackendType::Tantivy => "Tantivy",
|
||||
};
|
||||
let _ = admin.hset(&mk, vec![("backend".to_string(), val.to_string())])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_database_backend(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
) -> Result<Option<options::BackendType>, DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let mk = k_meta_db(id);
|
||||
match admin.hget(&mk, "backend")? {
|
||||
Some(s) if s == "Redb" => Ok(Some(options::BackendType::Redb)),
|
||||
Some(s) if s == "Sled" => Ok(Some(options::BackendType::Sled)),
|
||||
Some(s) if s == "Tantivy" => Ok(Some(options::BackendType::Tantivy)),
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
// Set database name
|
||||
pub fn set_database_name(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
name: &str,
|
||||
) -> Result<(), DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let mk = k_meta_db(id);
|
||||
let _ = admin.hset(&mk, vec![("name".to_string(), name.to_string())])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Get database name
|
||||
pub fn get_database_name(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
) -> Result<Option<String>, DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let mk = k_meta_db(id);
|
||||
admin.hget(&mk, "name")
|
||||
}
|
||||
|
||||
// Internal: load public flag; default to true when meta missing
|
||||
fn load_public(
|
||||
admin: &Arc<dyn StorageBackend>,
|
||||
id: u64,
|
||||
) -> Result<bool, DBError> {
|
||||
let mk = k_meta_db(id);
|
||||
match admin.hget(&mk, "public")? {
|
||||
Some(v) => Ok(v == "true"),
|
||||
None => Ok(true),
|
||||
}
|
||||
}
|
||||
|
||||
// Add access key for db (value format: "Read:ts" or "ReadWrite:ts")
|
||||
pub fn add_access_key(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
key_plain: &str,
|
||||
perms: Permissions,
|
||||
) -> Result<(), DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let hash = crate::rpc::hash_key(key_plain);
|
||||
let v = match perms {
|
||||
Permissions::Read => format!("Read:{}", now_secs()),
|
||||
Permissions::ReadWrite => format!("ReadWrite:{}", now_secs()),
|
||||
};
|
||||
let _ = admin.hset(&k_meta_db_keys(id), vec![(hash, v)])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Delete access key by hash
|
||||
pub fn delete_access_key(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
key_hash: &str,
|
||||
) -> Result<bool, DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let n = admin.hdel(&k_meta_db_keys(id), vec![key_hash.to_string()])?;
|
||||
Ok(n > 0)
|
||||
}
|
||||
|
||||
// List access keys, returning (hash, perms, created_at_secs)
|
||||
pub fn list_access_keys(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
) -> Result<Vec<(String, Permissions, u64)>, DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let pairs = admin.hgetall(&k_meta_db_keys(id))?;
|
||||
let mut out = Vec::new();
|
||||
for (hash, val) in pairs {
|
||||
let (perm, ts) = parse_perm_value(&val);
|
||||
out.push((hash, perm, ts));
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
// Verify access permission for db id with optional key
|
||||
// Returns:
|
||||
// - Ok(Some(Permissions)) when access is allowed
|
||||
// - Ok(None) when not allowed or db missing (caller can distinguish by calling db_exists)
|
||||
pub fn verify_access(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
id: u64,
|
||||
key_opt: Option<&str>,
|
||||
) -> Result<Option<Permissions>, DBError> {
|
||||
// Admin DB 0: require exact admin_secret
|
||||
if id == 0 {
|
||||
if let Some(k) = key_opt {
|
||||
if k == admin_secret {
|
||||
return Ok(Some(Permissions::ReadWrite));
|
||||
}
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
if !admin.hexists(k_admin_dbs(), &id.to_string())? {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Public?
|
||||
if load_public(&admin, id)? {
|
||||
return Ok(Some(Permissions::ReadWrite));
|
||||
}
|
||||
|
||||
// Private: require key and verify
|
||||
if let Some(k) = key_opt {
|
||||
let hash = crate::rpc::hash_key(k);
|
||||
if let Some(v) = admin.hget(&k_meta_db_keys(id), &hash)? {
|
||||
let (perm, _ts) = parse_perm_value(&v);
|
||||
return Ok(Some(perm));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
// Enumerate all db ids
|
||||
pub fn list_dbs(
|
||||
base_dir: &str,
|
||||
backend: options::BackendType,
|
||||
admin_secret: &str,
|
||||
) -> Result<Vec<u64>, DBError> {
|
||||
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
|
||||
let ids = admin.hkeys(k_admin_dbs())?;
|
||||
let mut out = Vec::new();
|
||||
for s in ids {
|
||||
if let Ok(v) = s.parse() {
|
||||
out.push(v);
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
// Helper: parse permission value "Read:ts" or "ReadWrite:ts"
|
||||
fn parse_perm_value(v: &str) -> (Permissions, u64) {
|
||||
let mut parts = v.split(':');
|
||||
let p = parts.next().unwrap_or("Read");
|
||||
let ts = parts
|
||||
.next()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(0u64);
|
||||
let perm = match p {
|
||||
"ReadWrite" => Permissions::ReadWrite,
|
||||
_ => Permissions::Read,
|
||||
};
|
||||
(perm, ts)
|
||||
}
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
290
src/age.rs
290
src/age.rs
@@ -19,6 +19,8 @@ use age::x25519;
|
||||
use ed25519_dalek::{Signature, Signer, Verifier, SigningKey, VerifyingKey};
|
||||
|
||||
use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::protocol::Protocol;
|
||||
use crate::server::Server;
|
||||
@@ -74,6 +76,125 @@ fn parse_ed25519_verifying_key(s: &str) -> Result<VerifyingKey, AgeWireError> {
|
||||
VerifyingKey::from_bytes(&key_bytes).map_err(|_| AgeWireError::ParseKey)
|
||||
}
|
||||
|
||||
// ---------- Derivation + Raw X25519 (Ed25519 -> X25519) ----------
|
||||
//
|
||||
// We deterministically derive an X25519 keypair from an Ed25519 SigningKey.
|
||||
// We persist the X25519 public/secret as base64-encoded 32-byte raw values
|
||||
// (no "age1..."/"AGE-SECRET-KEY-1..." formatting). Name-based encrypt/decrypt
|
||||
// uses these raw values directly via x25519-dalek + ChaCha20Poly1305.
|
||||
|
||||
use chacha20poly1305::{aead::{Aead, KeyInit}, ChaCha20Poly1305, Key, Nonce};
|
||||
use sha2::{Digest, Sha256};
|
||||
use x25519_dalek::{PublicKey as XPublicKey, StaticSecret as XStaticSecret};
|
||||
|
||||
fn derive_x25519_raw_from_ed25519(sk: &SigningKey) -> ([u8; 32], [u8; 32]) {
|
||||
// X25519 secret scalar (clamped) from Ed25519 secret
|
||||
let scalar: [u8; 32] = sk.to_scalar_bytes();
|
||||
// Build X25519 secret/public using dalek
|
||||
let xsec = XStaticSecret::from(scalar);
|
||||
let xpub = XPublicKey::from(&xsec);
|
||||
(xpub.to_bytes(), xsec.to_bytes())
|
||||
}
|
||||
|
||||
fn derive_x25519_raw_b64_from_ed25519(sk: &SigningKey) -> (String, String) {
|
||||
let (xpub, xsec) = derive_x25519_raw_from_ed25519(sk);
|
||||
(B64.encode(xpub), B64.encode(xsec))
|
||||
}
|
||||
|
||||
// Helper: detect whether a stored key looks like an age-formatted string
|
||||
fn looks_like_age_format(s: &str) -> bool {
|
||||
s.starts_with("age1") || s.starts_with("AGE-SECRET-KEY-1")
|
||||
}
|
||||
|
||||
// Our container format for name-based raw X25519 encryption:
|
||||
// bytes = "HDBX1" (5) || eph_pub(32) || nonce(12) || ciphertext(..)
|
||||
// Entire blob is base64-encoded for transport.
|
||||
const HDBX1_MAGIC: &[u8; 5] = b"HDBX1";
|
||||
|
||||
fn encrypt_b64_with_x25519_raw(recip_pub_b64: &str, msg: &str) -> Result<String, AgeWireError> {
|
||||
use rand::RngCore;
|
||||
use rand::rngs::OsRng;
|
||||
|
||||
// Parse recipient public key (raw 32 bytes, base64)
|
||||
let recip_pub_bytes = B64.decode(recip_pub_b64).map_err(|_| AgeWireError::ParseKey)?;
|
||||
if recip_pub_bytes.len() != 32 { return Err(AgeWireError::ParseKey); }
|
||||
let recip_pub_arr: [u8; 32] = recip_pub_bytes.as_slice().try_into().map_err(|_| AgeWireError::ParseKey)?;
|
||||
let recip_pub: XPublicKey = XPublicKey::from(recip_pub_arr);
|
||||
|
||||
// Generate ephemeral X25519 keypair
|
||||
let mut eph_sec_bytes = [0u8; 32];
|
||||
OsRng.fill_bytes(&mut eph_sec_bytes);
|
||||
let eph_sec = XStaticSecret::from(eph_sec_bytes);
|
||||
let eph_pub = XPublicKey::from(&eph_sec);
|
||||
|
||||
// ECDH
|
||||
let shared = eph_sec.diffie_hellman(&recip_pub);
|
||||
// Derive symmetric key via SHA-256 over context + shared + parties
|
||||
let mut hasher = Sha256::default();
|
||||
hasher.update(b"herodb-x25519-v1");
|
||||
hasher.update(shared.as_bytes());
|
||||
hasher.update(eph_pub.as_bytes());
|
||||
hasher.update(recip_pub.as_bytes());
|
||||
let key_bytes = hasher.finalize();
|
||||
let key = Key::from_slice(&key_bytes[..32]);
|
||||
|
||||
// Nonce (12 bytes)
|
||||
let mut nonce_bytes = [0u8; 12];
|
||||
OsRng.fill_bytes(&mut nonce_bytes);
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
// Encrypt
|
||||
let cipher = ChaCha20Poly1305::new(key);
|
||||
let ct = cipher.encrypt(nonce, msg.as_bytes())
|
||||
.map_err(|e| AgeWireError::Crypto(format!("encrypt: {e}")))?;
|
||||
|
||||
// Assemble container
|
||||
let mut out = Vec::with_capacity(5 + 32 + 12 + ct.len());
|
||||
out.extend_from_slice(HDBX1_MAGIC);
|
||||
out.extend_from_slice(eph_pub.as_bytes());
|
||||
out.extend_from_slice(&nonce_bytes);
|
||||
out.extend_from_slice(&ct);
|
||||
|
||||
Ok(B64.encode(out))
|
||||
}
|
||||
|
||||
fn decrypt_b64_with_x25519_raw(identity_sec_b64: &str, ct_b64: &str) -> Result<String, AgeWireError> {
|
||||
// Parse X25519 secret (raw 32 bytes, base64)
|
||||
let sec_bytes = B64.decode(identity_sec_b64).map_err(|_| AgeWireError::ParseKey)?;
|
||||
if sec_bytes.len() != 32 { return Err(AgeWireError::ParseKey); }
|
||||
let sec_arr: [u8; 32] = sec_bytes.as_slice().try_into().map_err(|_| AgeWireError::ParseKey)?;
|
||||
let xsec = XStaticSecret::from(sec_arr);
|
||||
let xpub = XPublicKey::from(&xsec); // self public
|
||||
|
||||
// Decode container
|
||||
let blob = B64.decode(ct_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
|
||||
if blob.len() < 5 + 32 + 12 { return Err(AgeWireError::Crypto("ciphertext too short".to_string())); }
|
||||
if &blob[..5] != HDBX1_MAGIC { return Err(AgeWireError::Crypto("bad header".to_string())); }
|
||||
|
||||
let eph_pub_arr: [u8; 32] = blob[5..5+32].try_into().map_err(|_| AgeWireError::Crypto("bad eph pub".to_string()))?;
|
||||
let eph_pub = XPublicKey::from(eph_pub_arr);
|
||||
let nonce_bytes: [u8; 12] = blob[5+32..5+32+12].try_into().unwrap();
|
||||
let ct = &blob[5+32+12..];
|
||||
|
||||
// Recompute shared + key
|
||||
let shared = xsec.diffie_hellman(&eph_pub);
|
||||
let mut hasher = Sha256::default();
|
||||
hasher.update(b"herodb-x25519-v1");
|
||||
hasher.update(shared.as_bytes());
|
||||
hasher.update(eph_pub.as_bytes());
|
||||
hasher.update(xpub.as_bytes());
|
||||
let key_bytes = hasher.finalize();
|
||||
let key = Key::from_slice(&key_bytes[..32]);
|
||||
|
||||
// Decrypt
|
||||
let cipher = ChaCha20Poly1305::new(key);
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
let pt = cipher.decrypt(nonce, ct)
|
||||
.map_err(|e| AgeWireError::Crypto(format!("decrypt: {e}")))?;
|
||||
|
||||
String::from_utf8(pt).map_err(|_| AgeWireError::Utf8)
|
||||
}
|
||||
|
||||
// ---------- Stateless crypto helpers (string in/out) ----------
|
||||
|
||||
pub fn gen_enc_keypair() -> (String, String) {
|
||||
@@ -210,13 +331,72 @@ pub async fn cmd_age_verify(verify_pub: &str, message: &str, sig_b64: &str) -> P
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- NEW: unified stateless generator (Ed25519 + derived X25519 raw) ----------
|
||||
//
|
||||
// Returns 4-tuple:
|
||||
// [ verify_pub_b64 (32B), signpriv_b64 (32B), x25519_pub_b64 (32B), x25519_sec_b64 (32B) ]
|
||||
// No persistence (stateless).
|
||||
pub async fn cmd_age_genkey() -> Protocol {
|
||||
use rand::RngCore;
|
||||
use rand::rngs::OsRng;
|
||||
|
||||
let mut secret_bytes = [0u8; 32];
|
||||
OsRng.fill_bytes(&mut secret_bytes);
|
||||
|
||||
let signing_key = SigningKey::from_bytes(&secret_bytes);
|
||||
let verifying_key = signing_key.verifying_key();
|
||||
|
||||
let verify_b64 = B64.encode(verifying_key.to_bytes());
|
||||
let sign_b64 = B64.encode(signing_key.to_bytes());
|
||||
|
||||
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&signing_key);
|
||||
|
||||
Protocol::Array(vec![
|
||||
Protocol::BulkString(verify_b64),
|
||||
Protocol::BulkString(sign_b64),
|
||||
Protocol::BulkString(xpub_b64),
|
||||
Protocol::BulkString(xsec_b64),
|
||||
])
|
||||
}
|
||||
|
||||
// ---------- NEW: Persistent, named-key commands ----------
|
||||
|
||||
pub async fn cmd_age_keygen(server: &Server, name: &str) -> Protocol {
|
||||
let (recip, ident) = gen_enc_keypair();
|
||||
if let Err(e) = sset(server, &enc_pub_key_key(name), &recip) { return e.to_protocol(); }
|
||||
if let Err(e) = sset(server, &enc_priv_key_key(name), &ident) { return e.to_protocol(); }
|
||||
Protocol::Array(vec![Protocol::BulkString(recip), Protocol::BulkString(ident)])
|
||||
use rand::RngCore;
|
||||
use rand::rngs::OsRng;
|
||||
|
||||
// Generate Ed25519 keypair
|
||||
let mut secret_bytes = [0u8; 32];
|
||||
OsRng.fill_bytes(&mut secret_bytes);
|
||||
let signing_key = SigningKey::from_bytes(&secret_bytes);
|
||||
let verifying_key = signing_key.verifying_key();
|
||||
|
||||
// Encode Ed25519 as base64 (32 bytes)
|
||||
let verify_b64 = B64.encode(verifying_key.to_bytes());
|
||||
let sign_b64 = B64.encode(signing_key.to_bytes());
|
||||
|
||||
// Derive X25519 raw (32-byte) keys and encode as base64
|
||||
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&signing_key);
|
||||
|
||||
// Decode to create age-formatted strings
|
||||
let xpub_bytes = B64.decode(&xpub_b64).unwrap();
|
||||
let xsec_bytes = B64.decode(&xsec_b64).unwrap();
|
||||
let xpub_arr: [u8; 32] = xpub_bytes.as_slice().try_into().unwrap();
|
||||
let xsec_arr: [u8; 32] = xsec_bytes.as_slice().try_into().unwrap();
|
||||
let recip_str = format!("age1{}", B64.encode(xpub_arr));
|
||||
let ident_str = format!("AGE-SECRET-KEY-1{}", B64.encode(xsec_arr));
|
||||
|
||||
// Persist Ed25519 and derived X25519 (key-managed mode)
|
||||
if let Err(e) = sset(server, &sign_pub_key_key(name), &verify_b64) { return e.to_protocol(); }
|
||||
if let Err(e) = sset(server, &sign_priv_key_key(name), &sign_b64) { return e.to_protocol(); }
|
||||
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
|
||||
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
|
||||
|
||||
// Return [recipient, identity] in age format
|
||||
Protocol::Array(vec![
|
||||
Protocol::BulkString(recip_str),
|
||||
Protocol::BulkString(ident_str),
|
||||
])
|
||||
}
|
||||
|
||||
pub async fn cmd_age_signkeygen(server: &Server, name: &str) -> Protocol {
|
||||
@@ -227,26 +407,76 @@ pub async fn cmd_age_signkeygen(server: &Server, name: &str) -> Protocol {
|
||||
}
|
||||
|
||||
pub async fn cmd_age_encrypt_name(server: &Server, name: &str, message: &str) -> Protocol {
|
||||
let recip = match sget(server, &enc_pub_key_key(name)) {
|
||||
// Load stored recipient (could be raw b64 32-byte or "age1..." from legacy)
|
||||
let recip_or_b64 = match sget(server, &enc_pub_key_key(name)) {
|
||||
Ok(Some(v)) => v,
|
||||
Ok(None) => return AgeWireError::NotFound("recipient (age:key:{name})").to_protocol(),
|
||||
Ok(None) => {
|
||||
// Derive from stored Ed25519 if present, then persist
|
||||
match sget(server, &sign_priv_key_key(name)) {
|
||||
Ok(Some(sign_b64)) => {
|
||||
let sk = match parse_ed25519_signing_key(&sign_b64) {
|
||||
Ok(k) => k,
|
||||
Err(e) => return e.to_protocol(),
|
||||
};
|
||||
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&sk);
|
||||
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
|
||||
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
|
||||
xpub_b64
|
||||
}
|
||||
Ok(None) => return AgeWireError::NotFound("recipient (age:key:{name})").to_protocol(),
|
||||
Err(e) => return e.to_protocol(),
|
||||
}
|
||||
}
|
||||
Err(e) => return e.to_protocol(),
|
||||
};
|
||||
match encrypt_b64(&recip, message) {
|
||||
Ok(ct) => Protocol::BulkString(ct),
|
||||
Err(e) => e.to_protocol(),
|
||||
|
||||
if looks_like_age_format(&recip_or_b64) {
|
||||
match encrypt_b64(&recip_or_b64, message) {
|
||||
Ok(ct) => Protocol::BulkString(ct),
|
||||
Err(e) => e.to_protocol(),
|
||||
}
|
||||
} else {
|
||||
match encrypt_b64_with_x25519_raw(&recip_or_b64, message) {
|
||||
Ok(ct) => Protocol::BulkString(ct),
|
||||
Err(e) => e.to_protocol(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cmd_age_decrypt_name(server: &Server, name: &str, ct_b64: &str) -> Protocol {
|
||||
let ident = match sget(server, &enc_priv_key_key(name)) {
|
||||
// Load stored identity (could be raw b64 32-byte or "AGE-SECRET-KEY-1..." from legacy)
|
||||
let ident_or_b64 = match sget(server, &enc_priv_key_key(name)) {
|
||||
Ok(Some(v)) => v,
|
||||
Ok(None) => return AgeWireError::NotFound("identity (age:privkey:{name})").to_protocol(),
|
||||
Ok(None) => {
|
||||
// Derive from stored Ed25519 if present, then persist
|
||||
match sget(server, &sign_priv_key_key(name)) {
|
||||
Ok(Some(sign_b64)) => {
|
||||
let sk = match parse_ed25519_signing_key(&sign_b64) {
|
||||
Ok(k) => k,
|
||||
Err(e) => return e.to_protocol(),
|
||||
};
|
||||
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&sk);
|
||||
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
|
||||
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
|
||||
xsec_b64
|
||||
}
|
||||
Ok(None) => return AgeWireError::NotFound("identity (age:privkey:{name})").to_protocol(),
|
||||
Err(e) => return e.to_protocol(),
|
||||
}
|
||||
}
|
||||
Err(e) => return e.to_protocol(),
|
||||
};
|
||||
match decrypt_b64(&ident, ct_b64) {
|
||||
Ok(pt) => Protocol::BulkString(pt),
|
||||
Err(e) => e.to_protocol(),
|
||||
|
||||
if looks_like_age_format(&ident_or_b64) {
|
||||
match decrypt_b64(&ident_or_b64, ct_b64) {
|
||||
Ok(pt) => Protocol::BulkString(pt),
|
||||
Err(e) => e.to_protocol(),
|
||||
}
|
||||
} else {
|
||||
match decrypt_b64_with_x25519_raw(&ident_or_b64, ct_b64) {
|
||||
Ok(pt) => Protocol::BulkString(pt),
|
||||
Err(e) => e.to_protocol(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,33 +506,31 @@ pub async fn cmd_age_verify_name(server: &Server, name: &str, message: &str, sig
|
||||
}
|
||||
|
||||
pub async fn cmd_age_list(server: &Server) -> Protocol {
|
||||
// Returns 4 arrays: ["encpub", <names...>], ["encpriv", ...], ["signpub", ...], ["signpriv", ...]
|
||||
// Return a flat, deduplicated, sorted list of managed key names (no labels)
|
||||
let st = match server.current_storage() { Ok(s) => s, Err(e) => return Protocol::err(&e.0) };
|
||||
|
||||
let pull = |pat: &str, prefix: &str| -> Result<Vec<String>, DBError> {
|
||||
let keys = st.keys(pat)?;
|
||||
let mut names: Vec<String> = keys.into_iter()
|
||||
let mut names: Vec<String> = keys
|
||||
.into_iter()
|
||||
.filter_map(|k| k.strip_prefix(prefix).map(|x| x.to_string()))
|
||||
.collect();
|
||||
names.sort();
|
||||
Ok(names)
|
||||
};
|
||||
|
||||
let encpub = match pull("age:key:*", "age:key:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
|
||||
let encpriv = match pull("age:privkey:*", "age:privkey:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
|
||||
let signpub = match pull("age:signpub:*", "age:signpub:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
|
||||
let signpriv= match pull("age:signpriv:*", "age:signpriv:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
|
||||
let encpub = match pull("age:key:*", "age:key:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
|
||||
let encpriv = match pull("age:privkey:*", "age:privkey:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
|
||||
let signpub = match pull("age:signpub:*", "age:signpub:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
|
||||
let signpriv = match pull("age:signpriv:*", "age:signpriv:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
|
||||
|
||||
let to_arr = |label: &str, v: Vec<String>| {
|
||||
let mut out = vec![Protocol::BulkString(label.to_string())];
|
||||
out.push(Protocol::Array(v.into_iter().map(Protocol::BulkString).collect()));
|
||||
Protocol::Array(out)
|
||||
};
|
||||
let mut set: HashSet<String> = HashSet::new();
|
||||
for n in encpub.into_iter().chain(encpriv).chain(signpub).chain(signpriv) {
|
||||
set.insert(n);
|
||||
}
|
||||
|
||||
Protocol::Array(vec![
|
||||
to_arr("encpub", encpub),
|
||||
to_arr("encpriv", encpriv),
|
||||
to_arr("signpub", signpub),
|
||||
to_arr("signpriv", signpriv),
|
||||
])
|
||||
let mut names: Vec<String> = set.into_iter().collect();
|
||||
names.sort();
|
||||
|
||||
Protocol::Array(names.into_iter().map(Protocol::BulkString).collect())
|
||||
}
|
939
src/cmd.rs
939
src/cmd.rs
File diff suppressed because it is too large
Load Diff
43
src/error.rs
43
src/error.rs
@@ -9,12 +9,6 @@ use bincode;
|
||||
#[derive(Debug)]
|
||||
pub struct DBError(pub String);
|
||||
|
||||
impl std::fmt::Display for DBError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for DBError {
|
||||
fn from(item: std::io::Error) -> Self {
|
||||
DBError(item.to_string().clone())
|
||||
@@ -98,40 +92,3 @@ impl From<chacha20poly1305::Error> for DBError {
|
||||
DBError(item.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
// Lance and related dependencies error handling
|
||||
impl From<lance::Error> for DBError {
|
||||
fn from(item: lance::Error) -> Self {
|
||||
DBError(item.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<arrow::error::ArrowError> for DBError {
|
||||
fn from(item: arrow::error::ArrowError) -> Self {
|
||||
DBError(item.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<reqwest::Error> for DBError {
|
||||
fn from(item: reqwest::Error) -> Self {
|
||||
DBError(item.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<image::ImageError> for DBError {
|
||||
fn from(item: image::ImageError) -> Self {
|
||||
DBError(item.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<uuid::Error> for DBError {
|
||||
fn from(item: uuid::Error) -> Self {
|
||||
DBError(item.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<base64::DecodeError> for DBError {
|
||||
fn from(item: base64::DecodeError) -> Self {
|
||||
DBError(item.to_string())
|
||||
}
|
||||
}
|
||||
|
@@ -1,609 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use arrow::array::{Float32Array, StringArray, ArrayRef, FixedSizeListArray, Array};
|
||||
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
|
||||
use arrow::record_batch::{RecordBatch, RecordBatchReader};
|
||||
use arrow::error::ArrowError;
|
||||
use lance::dataset::{Dataset, WriteParams, WriteMode};
|
||||
use lance::index::vector::VectorIndexParams;
|
||||
use lance_index::vector::pq::PQBuildParams;
|
||||
use lance_index::vector::ivf::IvfBuildParams;
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance_linalg::distance::MetricType;
|
||||
use futures::TryStreamExt;
|
||||
use base64::Engine;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::error::DBError;
|
||||
|
||||
// Simple RecordBatchReader implementation for Vec<RecordBatch>
|
||||
struct VecRecordBatchReader {
|
||||
batches: std::vec::IntoIter<Result<RecordBatch, ArrowError>>,
|
||||
}
|
||||
|
||||
impl VecRecordBatchReader {
|
||||
fn new(batches: Vec<RecordBatch>) -> Self {
|
||||
let result_batches = batches.into_iter().map(Ok).collect::<Vec<_>>();
|
||||
Self {
|
||||
batches: result_batches.into_iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for VecRecordBatchReader {
|
||||
type Item = Result<RecordBatch, ArrowError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.batches.next()
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchReader for VecRecordBatchReader {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
// This is a simplified implementation - in practice you'd want to store the schema
|
||||
Arc::new(Schema::empty())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct EmbeddingRequest {
|
||||
texts: Option<Vec<String>>,
|
||||
images: Option<Vec<String>>, // base64 encoded
|
||||
model: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct EmbeddingResponse {
|
||||
embeddings: Vec<Vec<f32>>,
|
||||
model: String,
|
||||
usage: Option<HashMap<String, u32>>,
|
||||
}
|
||||
|
||||
// Ollama-specific request/response structures
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct OllamaEmbeddingRequest {
|
||||
model: String,
|
||||
prompt: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct OllamaEmbeddingResponse {
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
pub struct LanceStore {
|
||||
datasets: Arc<RwLock<HashMap<String, Arc<Dataset>>>>,
|
||||
data_dir: PathBuf,
|
||||
http_client: reqwest::Client,
|
||||
}
|
||||
|
||||
impl LanceStore {
|
||||
pub async fn new(data_dir: PathBuf) -> Result<Self, DBError> {
|
||||
// Create data directory if it doesn't exist
|
||||
std::fs::create_dir_all(&data_dir)
|
||||
.map_err(|e| DBError(format!("Failed to create Lance data directory: {}", e)))?;
|
||||
|
||||
let http_client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.build()
|
||||
.map_err(|e| DBError(format!("Failed to create HTTP client: {}", e)))?;
|
||||
|
||||
Ok(Self {
|
||||
datasets: Arc::new(RwLock::new(HashMap::new())),
|
||||
data_dir,
|
||||
http_client,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get embedding service URL from Redis config, default to local Ollama
|
||||
async fn get_embedding_url(&self, server: &crate::server::Server) -> Result<String, DBError> {
|
||||
// Get the embedding URL from Redis config directly from storage
|
||||
let storage = server.current_storage()?;
|
||||
match storage.hget("config:core:aiembed", "url")? {
|
||||
Some(url) => Ok(url),
|
||||
None => Ok("http://localhost:11434".to_string()), // Default to local Ollama
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if we're using Ollama (default) or custom embedding service
|
||||
async fn is_ollama_service(&self, server: &crate::server::Server) -> Result<bool, DBError> {
|
||||
let url = self.get_embedding_url(server).await?;
|
||||
Ok(url.contains("localhost:11434") || url.contains("127.0.0.1:11434"))
|
||||
}
|
||||
|
||||
/// Call external embedding service (Ollama or custom)
|
||||
async fn call_embedding_service(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
texts: Option<Vec<String>>,
|
||||
images: Option<Vec<String>>,
|
||||
) -> Result<Vec<Vec<f32>>, DBError> {
|
||||
let base_url = self.get_embedding_url(server).await?;
|
||||
let is_ollama = self.is_ollama_service(server).await?;
|
||||
|
||||
if is_ollama {
|
||||
// Use Ollama API format
|
||||
if let Some(texts) = texts {
|
||||
let mut embeddings = Vec::new();
|
||||
for text in texts {
|
||||
let url = format!("{}/api/embeddings", base_url);
|
||||
let request = OllamaEmbeddingRequest {
|
||||
model: "nomic-embed-text".to_string(),
|
||||
prompt: text,
|
||||
};
|
||||
|
||||
let response = self.http_client
|
||||
.post(&url)
|
||||
.json(&request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to call Ollama embedding service: {}", e)))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_default();
|
||||
return Err(DBError(format!(
|
||||
"Ollama embedding service returned error {}: {}",
|
||||
status, error_text
|
||||
)));
|
||||
}
|
||||
|
||||
let ollama_response: OllamaEmbeddingResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to parse Ollama embedding response: {}", e)))?;
|
||||
|
||||
embeddings.push(ollama_response.embedding);
|
||||
}
|
||||
Ok(embeddings)
|
||||
} else if let Some(_images) = images {
|
||||
// Ollama doesn't support image embeddings with this API yet
|
||||
Err(DBError("Image embeddings not supported with Ollama. Please configure a custom embedding service.".to_string()))
|
||||
} else {
|
||||
Err(DBError("No text or images provided for embedding".to_string()))
|
||||
}
|
||||
} else {
|
||||
// Use custom embedding service API format
|
||||
let request = EmbeddingRequest {
|
||||
texts,
|
||||
images,
|
||||
model: None, // Let the service use its default
|
||||
};
|
||||
|
||||
let response = self.http_client
|
||||
.post(&base_url)
|
||||
.json(&request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to call embedding service: {}", e)))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_default();
|
||||
return Err(DBError(format!(
|
||||
"Embedding service returned error {}: {}",
|
||||
status, error_text
|
||||
)));
|
||||
}
|
||||
|
||||
let embedding_response: EmbeddingResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to parse embedding response: {}", e)))?;
|
||||
|
||||
Ok(embedding_response.embeddings)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn embed_text(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
texts: Vec<String>
|
||||
) -> Result<Vec<Vec<f32>>, DBError> {
|
||||
if texts.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
self.call_embedding_service(server, Some(texts), None).await
|
||||
}
|
||||
|
||||
pub async fn embed_image(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
image_bytes: Vec<u8>
|
||||
) -> Result<Vec<f32>, DBError> {
|
||||
// Convert image bytes to base64
|
||||
let base64_image = base64::engine::general_purpose::STANDARD.encode(&image_bytes);
|
||||
|
||||
let embeddings = self.call_embedding_service(
|
||||
server,
|
||||
None,
|
||||
Some(vec![base64_image])
|
||||
).await?;
|
||||
|
||||
embeddings.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| DBError("No embedding returned for image".to_string()))
|
||||
}
|
||||
|
||||
pub async fn create_dataset(
|
||||
&self,
|
||||
name: &str,
|
||||
schema: Schema,
|
||||
) -> Result<(), DBError> {
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||
|
||||
// Create empty dataset with schema
|
||||
let write_params = WriteParams {
|
||||
mode: WriteMode::Create,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Create an empty RecordBatch with the schema
|
||||
let empty_batch = RecordBatch::new_empty(Arc::new(schema));
|
||||
|
||||
// Use RecordBatchReader for Lance 0.33
|
||||
let reader = VecRecordBatchReader::new(vec![empty_batch]);
|
||||
let dataset = Dataset::write(
|
||||
reader,
|
||||
dataset_path.to_str().unwrap(),
|
||||
Some(write_params)
|
||||
).await
|
||||
.map_err(|e| DBError(format!("Failed to create dataset: {}", e)))?;
|
||||
|
||||
let mut datasets = self.datasets.write().await;
|
||||
datasets.insert(name.to_string(), Arc::new(dataset));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write_vectors(
|
||||
&self,
|
||||
dataset_name: &str,
|
||||
vectors: Vec<Vec<f32>>,
|
||||
metadata: Option<HashMap<String, Vec<String>>>,
|
||||
) -> Result<usize, DBError> {
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", dataset_name));
|
||||
|
||||
// Open or get cached dataset
|
||||
let _dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
// Build RecordBatch
|
||||
let num_vectors = vectors.len();
|
||||
if num_vectors == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let dim = vectors.first()
|
||||
.ok_or_else(|| DBError("Empty vectors".to_string()))?
|
||||
.len();
|
||||
|
||||
// Flatten vectors
|
||||
let flat_vectors: Vec<f32> = vectors.into_iter().flatten().collect();
|
||||
let values_array = Float32Array::from(flat_vectors);
|
||||
let field = Arc::new(Field::new("item", DataType::Float32, true));
|
||||
let vector_array = FixedSizeListArray::try_new(
|
||||
field,
|
||||
dim as i32,
|
||||
Arc::new(values_array),
|
||||
None
|
||||
).map_err(|e| DBError(format!("Failed to create vector array: {}", e)))?;
|
||||
|
||||
let mut arrays: Vec<ArrayRef> = vec![Arc::new(vector_array)];
|
||||
let mut fields = vec![Field::new(
|
||||
"vector",
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
dim as i32
|
||||
),
|
||||
false
|
||||
)];
|
||||
|
||||
// Add metadata columns if provided
|
||||
if let Some(metadata) = metadata {
|
||||
for (key, values) in metadata {
|
||||
if values.len() != num_vectors {
|
||||
return Err(DBError(format!(
|
||||
"Metadata field '{}' has {} values but expected {}",
|
||||
key, values.len(), num_vectors
|
||||
)));
|
||||
}
|
||||
let array = StringArray::from(values);
|
||||
arrays.push(Arc::new(array));
|
||||
fields.push(Field::new(&key, DataType::Utf8, true));
|
||||
}
|
||||
}
|
||||
|
||||
let schema = Arc::new(Schema::new(fields));
|
||||
let batch = RecordBatch::try_new(schema, arrays)
|
||||
.map_err(|e| DBError(format!("Failed to create RecordBatch: {}", e)))?;
|
||||
|
||||
// Append to dataset
|
||||
let write_params = WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let reader = VecRecordBatchReader::new(vec![batch]);
|
||||
Dataset::write(
|
||||
reader,
|
||||
dataset_path.to_str().unwrap(),
|
||||
Some(write_params)
|
||||
).await
|
||||
.map_err(|e| DBError(format!("Failed to write to dataset: {}", e)))?;
|
||||
|
||||
// Refresh cached dataset
|
||||
let mut datasets = self.datasets.write().await;
|
||||
datasets.remove(dataset_name);
|
||||
|
||||
Ok(num_vectors)
|
||||
}
|
||||
|
||||
pub async fn search_vectors(
|
||||
&self,
|
||||
dataset_name: &str,
|
||||
query_vector: Vec<f32>,
|
||||
k: usize,
|
||||
nprobes: Option<usize>,
|
||||
_refine_factor: Option<usize>,
|
||||
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
|
||||
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
// Build query
|
||||
let query_array = Float32Array::from(query_vector.clone());
|
||||
let mut query = dataset.scan();
|
||||
query.nearest(
|
||||
"vector",
|
||||
&query_array,
|
||||
k,
|
||||
).map_err(|e| DBError(format!("Failed to build search query: {}", e)))?;
|
||||
|
||||
if let Some(nprobes) = nprobes {
|
||||
query.nprobs(nprobes);
|
||||
}
|
||||
|
||||
// Note: refine_factor might not be available in this Lance version
|
||||
// if let Some(refine) = refine_factor {
|
||||
// query.refine_factor(refine);
|
||||
// }
|
||||
|
||||
// Execute search
|
||||
let results = query
|
||||
.try_into_stream()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to execute search: {}", e)))?
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to collect results: {}", e)))?;
|
||||
|
||||
// Process results
|
||||
let mut output = Vec::new();
|
||||
for batch in results {
|
||||
// Get distances
|
||||
let distances = batch
|
||||
.column_by_name("_distance")
|
||||
.ok_or_else(|| DBError("No distance column".to_string()))?
|
||||
.as_any()
|
||||
.downcast_ref::<Float32Array>()
|
||||
.ok_or_else(|| DBError("Invalid distance type".to_string()))?;
|
||||
|
||||
// Get metadata
|
||||
for i in 0..batch.num_rows() {
|
||||
let distance = distances.value(i);
|
||||
let mut metadata = HashMap::new();
|
||||
|
||||
for field in batch.schema().fields() {
|
||||
if field.name() != "vector" && field.name() != "_distance" {
|
||||
if let Some(col) = batch.column_by_name(field.name()) {
|
||||
if let Some(str_array) = col.as_any().downcast_ref::<StringArray>() {
|
||||
if !str_array.is_null(i) {
|
||||
metadata.insert(
|
||||
field.name().to_string(),
|
||||
str_array.value(i).to_string()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output.push((distance, metadata));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub async fn store_multimodal(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
dataset_name: &str,
|
||||
text: Option<String>,
|
||||
image_bytes: Option<Vec<u8>>,
|
||||
metadata: HashMap<String, String>,
|
||||
) -> Result<String, DBError> {
|
||||
// Generate ID
|
||||
let id = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
// Generate embeddings using external service
|
||||
let embedding = if let Some(text) = text.as_ref() {
|
||||
self.embed_text(server, vec![text.clone()]).await?
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| DBError("No embedding returned".to_string()))?
|
||||
} else if let Some(img) = image_bytes.as_ref() {
|
||||
self.embed_image(server, img.clone()).await?
|
||||
} else {
|
||||
return Err(DBError("No text or image provided".to_string()));
|
||||
};
|
||||
|
||||
// Prepare metadata
|
||||
let mut full_metadata = metadata;
|
||||
full_metadata.insert("id".to_string(), id.clone());
|
||||
if let Some(text) = text {
|
||||
full_metadata.insert("text".to_string(), text);
|
||||
}
|
||||
if let Some(img) = image_bytes {
|
||||
full_metadata.insert("image_base64".to_string(), base64::engine::general_purpose::STANDARD.encode(img));
|
||||
}
|
||||
|
||||
// Convert metadata to column vectors
|
||||
let mut metadata_cols = HashMap::new();
|
||||
for (key, value) in full_metadata {
|
||||
metadata_cols.insert(key, vec![value]);
|
||||
}
|
||||
|
||||
// Write to dataset
|
||||
self.write_vectors(dataset_name, vec![embedding], Some(metadata_cols)).await?;
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub async fn search_with_text(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
dataset_name: &str,
|
||||
query_text: String,
|
||||
k: usize,
|
||||
nprobes: Option<usize>,
|
||||
refine_factor: Option<usize>,
|
||||
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
|
||||
// Embed the query text using external service
|
||||
let embeddings = self.embed_text(server, vec![query_text]).await?;
|
||||
let query_vector = embeddings.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| DBError("No embedding returned for query".to_string()))?;
|
||||
|
||||
// Search with the embedding
|
||||
self.search_vectors(dataset_name, query_vector, k, nprobes, refine_factor).await
|
||||
}
|
||||
|
||||
pub async fn create_index(
|
||||
&self,
|
||||
dataset_name: &str,
|
||||
index_type: &str,
|
||||
num_partitions: Option<usize>,
|
||||
num_sub_vectors: Option<usize>,
|
||||
) -> Result<(), DBError> {
|
||||
let _dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
match index_type.to_uppercase().as_str() {
|
||||
"IVF_PQ" => {
|
||||
let ivf_params = IvfBuildParams {
|
||||
num_partitions: num_partitions.unwrap_or(256),
|
||||
..Default::default()
|
||||
};
|
||||
let pq_params = PQBuildParams {
|
||||
num_sub_vectors: num_sub_vectors.unwrap_or(16),
|
||||
..Default::default()
|
||||
};
|
||||
let params = VectorIndexParams::with_ivf_pq_params(
|
||||
MetricType::L2,
|
||||
ivf_params,
|
||||
pq_params,
|
||||
);
|
||||
|
||||
// Get a mutable reference to the dataset
|
||||
let mut dataset_mut = Dataset::open(self.data_dir.join(format!("{}.lance", dataset_name)).to_str().unwrap())
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to open dataset for indexing: {}", e)))?;
|
||||
|
||||
dataset_mut.create_index(
|
||||
&["vector"],
|
||||
lance_index::IndexType::Vector,
|
||||
None,
|
||||
¶ms,
|
||||
true
|
||||
).await
|
||||
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||
}
|
||||
_ => return Err(DBError(format!("Unsupported index type: {}", index_type))),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_or_open_dataset(&self, name: &str) -> Result<Arc<Dataset>, DBError> {
|
||||
let mut datasets = self.datasets.write().await;
|
||||
|
||||
if let Some(dataset) = datasets.get(name) {
|
||||
return Ok(dataset.clone());
|
||||
}
|
||||
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||
if !dataset_path.exists() {
|
||||
return Err(DBError(format!("Dataset '{}' does not exist", name)));
|
||||
}
|
||||
|
||||
let dataset = Dataset::open(dataset_path.to_str().unwrap())
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to open dataset: {}", e)))?;
|
||||
|
||||
let dataset = Arc::new(dataset);
|
||||
datasets.insert(name.to_string(), dataset.clone());
|
||||
|
||||
Ok(dataset)
|
||||
}
|
||||
|
||||
pub async fn list_datasets(&self) -> Result<Vec<String>, DBError> {
|
||||
let mut datasets = Vec::new();
|
||||
|
||||
let entries = std::fs::read_dir(&self.data_dir)
|
||||
.map_err(|e| DBError(format!("Failed to read data directory: {}", e)))?;
|
||||
|
||||
for entry in entries {
|
||||
let entry = entry.map_err(|e| DBError(format!("Failed to read entry: {}", e)))?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() {
|
||||
if let Some(name) = path.file_name() {
|
||||
if let Some(name_str) = name.to_str() {
|
||||
if name_str.ends_with(".lance") {
|
||||
let dataset_name = name_str.trim_end_matches(".lance");
|
||||
datasets.push(dataset_name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(datasets)
|
||||
}
|
||||
|
||||
pub async fn drop_dataset(&self, name: &str) -> Result<(), DBError> {
|
||||
// Remove from cache
|
||||
let mut datasets = self.datasets.write().await;
|
||||
datasets.remove(name);
|
||||
|
||||
// Delete from disk
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||
if dataset_path.exists() {
|
||||
std::fs::remove_dir_all(dataset_path)
|
||||
.map_err(|e| DBError(format!("Failed to delete dataset: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_dataset_info(&self, name: &str) -> Result<HashMap<String, String>, DBError> {
|
||||
let dataset = self.get_or_open_dataset(name).await?;
|
||||
|
||||
let mut info = HashMap::new();
|
||||
info.insert("name".to_string(), name.to_string());
|
||||
info.insert("version".to_string(), dataset.version().version.to_string());
|
||||
info.insert("num_rows".to_string(), dataset.count_rows(None).await?.to_string());
|
||||
|
||||
// Get schema info
|
||||
let schema = dataset.schema();
|
||||
let fields: Vec<String> = schema.fields
|
||||
.iter()
|
||||
.map(|f| format!("{}:{}", f.name, f.data_type()))
|
||||
.collect();
|
||||
info.insert("schema".to_string(), fields.join(", "));
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
}
|
13
src/lib.rs
13
src/lib.rs
@@ -1,11 +1,16 @@
|
||||
pub mod age; // NEW
|
||||
pub mod age;
|
||||
pub mod sym;
|
||||
pub mod cmd;
|
||||
pub mod crypto;
|
||||
pub mod error;
|
||||
pub mod lance_store; // Add Lance store module
|
||||
pub mod options;
|
||||
pub mod protocol;
|
||||
pub mod rpc;
|
||||
pub mod rpc_server;
|
||||
pub mod server;
|
||||
pub mod storage;
|
||||
pub mod storage_trait; // Add this
|
||||
pub mod storage_sled; // Add this
|
||||
pub mod storage_trait;
|
||||
pub mod storage_sled;
|
||||
pub mod admin_meta;
|
||||
pub mod tantivy_search;
|
||||
pub mod search_cmd;
|
||||
|
58
src/main.rs
58
src/main.rs
@@ -3,6 +3,7 @@
|
||||
use tokio::net::TcpListener;
|
||||
|
||||
use herodb::server;
|
||||
use herodb::rpc_server;
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
@@ -22,18 +23,29 @@ struct Args {
|
||||
#[arg(long)]
|
||||
debug: bool,
|
||||
|
||||
|
||||
/// Master encryption key for encrypted databases
|
||||
/// Master encryption key for encrypted databases (deprecated; ignored for data DBs)
|
||||
#[arg(long)]
|
||||
encryption_key: Option<String>,
|
||||
|
||||
/// Encrypt the database
|
||||
/// Encrypt the database (deprecated; ignored for data DBs)
|
||||
#[arg(long)]
|
||||
encrypt: bool,
|
||||
|
||||
/// Enable RPC management server
|
||||
#[arg(long)]
|
||||
enable_rpc: bool,
|
||||
|
||||
/// RPC server port (default: 8080)
|
||||
#[arg(long, default_value = "8080")]
|
||||
rpc_port: u16,
|
||||
|
||||
/// Use the sled backend
|
||||
#[arg(long)]
|
||||
sled: bool,
|
||||
|
||||
/// Admin secret used to encrypt DB 0 and authorize admin access (required)
|
||||
#[arg(long)]
|
||||
admin_secret: String,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
@@ -48,9 +60,19 @@ async fn main() {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// deprecation warnings for legacy flags
|
||||
if args.encrypt || args.encryption_key.is_some() {
|
||||
eprintln!("warning: --encrypt and --encryption-key are deprecated and ignored for data DBs. Admin DB 0 is always encrypted with --admin-secret.");
|
||||
}
|
||||
// basic validation for admin secret
|
||||
if args.admin_secret.trim().is_empty() {
|
||||
eprintln!("error: --admin-secret must not be empty");
|
||||
std::process::exit(2);
|
||||
}
|
||||
|
||||
// new DB option
|
||||
let option = herodb::options::DBOption {
|
||||
dir: args.dir,
|
||||
dir: args.dir.clone(),
|
||||
port,
|
||||
debug: args.debug,
|
||||
encryption_key: args.encryption_key,
|
||||
@@ -60,14 +82,42 @@ async fn main() {
|
||||
} else {
|
||||
herodb::options::BackendType::Redb
|
||||
},
|
||||
admin_secret: args.admin_secret.clone(),
|
||||
};
|
||||
|
||||
let backend = option.backend.clone();
|
||||
|
||||
// Bootstrap admin DB 0 before opening any server storage
|
||||
if let Err(e) = herodb::admin_meta::ensure_bootstrap(&args.dir, backend.clone(), &args.admin_secret) {
|
||||
eprintln!("Failed to bootstrap admin DB 0: {}", e.0);
|
||||
std::process::exit(2);
|
||||
}
|
||||
|
||||
// new server
|
||||
let server = server::Server::new(option).await;
|
||||
|
||||
// Add a small delay to ensure the port is ready
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
|
||||
// Start RPC server if enabled
|
||||
let _rpc_handle = if args.enable_rpc {
|
||||
let rpc_addr = format!("127.0.0.1:{}", args.rpc_port).parse().unwrap();
|
||||
let base_dir = args.dir.clone();
|
||||
|
||||
match rpc_server::start_rpc_server(rpc_addr, base_dir, backend, args.admin_secret.clone()).await {
|
||||
Ok(handle) => {
|
||||
println!("RPC management server started on port {}", args.rpc_port);
|
||||
Some(handle)
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Failed to start RPC server: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// accept new connections
|
||||
loop {
|
||||
let stream = listener.accept().await;
|
||||
|
@@ -1,7 +1,8 @@
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum BackendType {
|
||||
Redb,
|
||||
Sled,
|
||||
Tantivy, // Full-text search backend (no KV storage)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -9,7 +10,11 @@ pub struct DBOption {
|
||||
pub dir: String,
|
||||
pub port: u16,
|
||||
pub debug: bool,
|
||||
// Deprecated for data DBs; retained for backward-compat on CLI parsing
|
||||
pub encrypt: bool,
|
||||
// Deprecated for data DBs; retained for backward-compat on CLI parsing
|
||||
pub encryption_key: Option<String>,
|
||||
pub backend: BackendType,
|
||||
// New: required admin secret, used to encrypt DB 0 and authorize admin operations
|
||||
pub admin_secret: String,
|
||||
}
|
||||
|
646
src/rpc.rs
Normal file
646
src/rpc.rs
Normal file
@@ -0,0 +1,646 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use jsonrpsee::{core::RpcResult, proc_macros::rpc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
use crate::server::Server;
|
||||
use crate::options::DBOption;
|
||||
use crate::admin_meta;
|
||||
|
||||
/// Database backend types
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum BackendType {
|
||||
Redb,
|
||||
Sled,
|
||||
Tantivy, // Full-text search backend (no KV storage)
|
||||
// Future: InMemory, Custom(String)
|
||||
}
|
||||
|
||||
/// Database configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatabaseConfig {
|
||||
pub name: Option<String>,
|
||||
pub storage_path: Option<String>,
|
||||
pub max_size: Option<u64>,
|
||||
pub redis_version: Option<String>,
|
||||
}
|
||||
|
||||
/// Database information returned by metadata queries
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatabaseInfo {
|
||||
pub id: u64,
|
||||
pub name: Option<String>,
|
||||
pub backend: BackendType,
|
||||
pub encrypted: bool,
|
||||
pub redis_version: Option<String>,
|
||||
pub storage_path: Option<String>,
|
||||
pub size_on_disk: Option<u64>,
|
||||
pub key_count: Option<u64>,
|
||||
pub created_at: u64,
|
||||
pub last_access: Option<u64>,
|
||||
}
|
||||
|
||||
/// Access permissions for database keys
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub enum Permissions {
|
||||
Read,
|
||||
ReadWrite,
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Access key information returned by RPC
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AccessKeyInfo {
|
||||
pub hash: String,
|
||||
pub permissions: Permissions,
|
||||
pub created_at: u64,
|
||||
}
|
||||
|
||||
/// Hash a plaintext key using SHA-256
|
||||
pub fn hash_key(key: &str) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(key.as_bytes());
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
/// RPC trait for HeroDB management
|
||||
#[rpc(server, client, namespace = "herodb")]
|
||||
pub trait Rpc {
|
||||
/// Create a new database with specified configuration
|
||||
#[method(name = "createDatabase")]
|
||||
async fn create_database(
|
||||
&self,
|
||||
backend: BackendType,
|
||||
config: DatabaseConfig,
|
||||
encryption_key: Option<String>,
|
||||
) -> RpcResult<u64>;
|
||||
|
||||
/// Set encryption for an existing database (write-only key)
|
||||
#[method(name = "setEncryption")]
|
||||
async fn set_encryption(&self, db_id: u64, encryption_key: String) -> RpcResult<bool>;
|
||||
|
||||
/// List all managed databases
|
||||
#[method(name = "listDatabases")]
|
||||
async fn list_databases(&self) -> RpcResult<Vec<DatabaseInfo>>;
|
||||
|
||||
/// Get detailed information about a specific database
|
||||
#[method(name = "getDatabaseInfo")]
|
||||
async fn get_database_info(&self, db_id: u64) -> RpcResult<DatabaseInfo>;
|
||||
|
||||
/// Delete a database
|
||||
#[method(name = "deleteDatabase")]
|
||||
async fn delete_database(&self, db_id: u64) -> RpcResult<bool>;
|
||||
|
||||
/// Get server statistics
|
||||
#[method(name = "getServerStats")]
|
||||
async fn get_server_stats(&self) -> RpcResult<HashMap<String, serde_json::Value>>;
|
||||
|
||||
/// Add an access key to a database
|
||||
#[method(name = "addAccessKey")]
|
||||
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool>;
|
||||
|
||||
/// Delete an access key from a database
|
||||
#[method(name = "deleteAccessKey")]
|
||||
async fn delete_access_key(&self, db_id: u64, key_hash: String) -> RpcResult<bool>;
|
||||
|
||||
/// List all access keys for a database
|
||||
#[method(name = "listAccessKeys")]
|
||||
async fn list_access_keys(&self, db_id: u64) -> RpcResult<Vec<AccessKeyInfo>>;
|
||||
|
||||
/// Set database public/private status
|
||||
#[method(name = "setDatabasePublic")]
|
||||
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool>;
|
||||
|
||||
// ----- Full-text (Tantivy) minimal RPC endpoints -----
|
||||
|
||||
/// Create a new FT index in a Tantivy-backed DB
|
||||
#[method(name = "ftCreate")]
|
||||
async fn ft_create(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>,
|
||||
) -> RpcResult<bool>;
|
||||
|
||||
/// Add or replace a document in an FT index
|
||||
#[method(name = "ftAdd")]
|
||||
async fn ft_add(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
score: f64,
|
||||
fields: HashMap<String, String>,
|
||||
) -> RpcResult<bool>;
|
||||
|
||||
/// Search an FT index
|
||||
#[method(name = "ftSearch")]
|
||||
async fn ft_search(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Option<Vec<(String, String)>>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
) -> RpcResult<serde_json::Value>;
|
||||
|
||||
/// Delete a document by id from an FT index
|
||||
#[method(name = "ftDel")]
|
||||
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool>;
|
||||
|
||||
/// Get FT index info
|
||||
#[method(name = "ftInfo")]
|
||||
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value>;
|
||||
|
||||
/// Drop an FT index
|
||||
#[method(name = "ftDrop")]
|
||||
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool>;
|
||||
}
|
||||
|
||||
/// RPC Server implementation
|
||||
pub struct RpcServerImpl {
|
||||
/// Base directory for database files
|
||||
base_dir: String,
|
||||
/// Managed database servers
|
||||
servers: Arc<RwLock<HashMap<u64, Arc<Server>>>>,
|
||||
/// Default backend type
|
||||
backend: crate::options::BackendType,
|
||||
/// Admin secret used to encrypt DB 0 and authorize admin access
|
||||
admin_secret: String,
|
||||
}
|
||||
|
||||
impl RpcServerImpl {
|
||||
/// Create a new RPC server instance
|
||||
pub fn new(base_dir: String, backend: crate::options::BackendType, admin_secret: String) -> Self {
|
||||
Self {
|
||||
base_dir,
|
||||
servers: Arc::new(RwLock::new(HashMap::new())),
|
||||
backend,
|
||||
admin_secret,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create a server instance for the given database ID
|
||||
async fn get_or_create_server(&self, db_id: u64) -> Result<Arc<Server>, jsonrpsee::types::ErrorObjectOwned> {
|
||||
// Check if server already exists
|
||||
{
|
||||
let servers = self.servers.read().await;
|
||||
if let Some(server) = servers.get(&db_id) {
|
||||
return Ok(server.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Validate existence via admin DB 0 (metadata), not filesystem presence
|
||||
let exists = admin_meta::db_exists(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
if !exists {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Database {} not found", db_id),
|
||||
None::<()>
|
||||
));
|
||||
}
|
||||
|
||||
// Resolve effective backend for this db from admin meta or filesystem; fallback to default
|
||||
let meta_backend = admin_meta::get_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
|
||||
.ok()
|
||||
.flatten();
|
||||
let db_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}.db", db_id));
|
||||
let sniffed_backend = if db_path.exists() {
|
||||
if db_path.is_file() {
|
||||
Some(crate::options::BackendType::Redb)
|
||||
} else if db_path.is_dir() {
|
||||
Some(crate::options::BackendType::Sled)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let effective_backend = meta_backend.clone().or(sniffed_backend).unwrap_or(self.backend.clone());
|
||||
if effective_backend != self.backend {
|
||||
eprintln!(
|
||||
"notice: get_or_create_server: db {} backend resolved to {:?} (server default {:?})",
|
||||
db_id, effective_backend, self.backend
|
||||
);
|
||||
}
|
||||
// If we had to sniff (no meta), persist the resolved backend
|
||||
if meta_backend.is_none() {
|
||||
let _ = admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, effective_backend.clone());
|
||||
}
|
||||
|
||||
// Create server instance with resolved backend
|
||||
let is_tantivy = matches!(effective_backend, crate::options::BackendType::Tantivy);
|
||||
let db_option = DBOption {
|
||||
dir: self.base_dir.clone(),
|
||||
port: 0, // Not used for RPC-managed databases
|
||||
debug: false,
|
||||
encryption_key: None,
|
||||
encrypt: false,
|
||||
backend: effective_backend.clone(),
|
||||
admin_secret: self.admin_secret.clone(),
|
||||
};
|
||||
|
||||
let mut server = Server::new(db_option).await;
|
||||
|
||||
// Set the selected database to the db_id
|
||||
server.selected_db = db_id;
|
||||
|
||||
// Lazily open/create physical storage according to admin meta (per-db encryption)
|
||||
// Skip for Tantivy backend (no KV storage to open)
|
||||
if !is_tantivy {
|
||||
let _ = server.current_storage();
|
||||
}
|
||||
|
||||
// Store the server
|
||||
let mut servers = self.servers.write().await;
|
||||
servers.insert(db_id, Arc::new(server.clone()));
|
||||
|
||||
Ok(Arc::new(server))
|
||||
}
|
||||
|
||||
/// Discover existing database IDs from admin DB 0
|
||||
async fn discover_databases(&self) -> Vec<u64> {
|
||||
admin_meta::list_dbs(&self.base_dir, self.backend.clone(), &self.admin_secret)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/// Build database file path for given server/db_id
|
||||
fn db_file_path(&self, server: &Server, db_id: u64) -> std::path::PathBuf {
|
||||
std::path::PathBuf::from(&server.option.dir).join(format!("{}.db", db_id))
|
||||
}
|
||||
|
||||
/// Recursively compute size on disk for the database path
|
||||
fn compute_size_on_disk(&self, path: &std::path::Path) -> Option<u64> {
|
||||
fn dir_size(p: &std::path::Path) -> u64 {
|
||||
if p.is_file() {
|
||||
std::fs::metadata(p).map(|m| m.len()).unwrap_or(0)
|
||||
} else if p.is_dir() {
|
||||
let mut total = 0u64;
|
||||
if let Ok(read) = std::fs::read_dir(p) {
|
||||
for entry in read.flatten() {
|
||||
total += dir_size(&entry.path());
|
||||
}
|
||||
}
|
||||
total
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
Some(dir_size(path))
|
||||
}
|
||||
|
||||
/// Extract created and last access times (secs) from a path, with fallbacks
|
||||
fn get_file_times_secs(path: &std::path::Path) -> (u64, Option<u64>) {
|
||||
let now = std::time::SystemTime::now();
|
||||
let created = std::fs::metadata(path)
|
||||
.and_then(|m| m.created().or_else(|_| m.modified()))
|
||||
.unwrap_or(now)
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
|
||||
let last_access = std::fs::metadata(path)
|
||||
.and_then(|m| m.accessed())
|
||||
.ok()
|
||||
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok().map(|d| d.as_secs()));
|
||||
|
||||
(created, last_access)
|
||||
}
|
||||
|
||||
/// Compose a DatabaseInfo by probing storage and filesystem, with admin meta for access key count
|
||||
async fn build_database_info(&self, db_id: u64, server: &Server) -> DatabaseInfo {
|
||||
// Probe storage to determine encryption state
|
||||
let storage = server.current_storage().ok();
|
||||
let encrypted = storage.as_ref().map(|s| s.is_encrypted()).unwrap_or(server.option.encrypt);
|
||||
|
||||
// Get actual key count from storage
|
||||
let key_count = storage.as_ref()
|
||||
.and_then(|s| s.dbsize().ok())
|
||||
.map(|count| count as u64);
|
||||
|
||||
// Get database name from admin meta
|
||||
let name = admin_meta::get_database_name(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
|
||||
.ok()
|
||||
.flatten();
|
||||
|
||||
// Compute size on disk and timestamps from the DB file path
|
||||
let db_path = self.db_file_path(server, db_id);
|
||||
let size_on_disk = self.compute_size_on_disk(&db_path);
|
||||
let (created_at, last_access) = Self::get_file_times_secs(&db_path);
|
||||
|
||||
let backend = match server.option.backend {
|
||||
crate::options::BackendType::Redb => BackendType::Redb,
|
||||
crate::options::BackendType::Sled => BackendType::Sled,
|
||||
crate::options::BackendType::Tantivy => BackendType::Tantivy,
|
||||
};
|
||||
|
||||
DatabaseInfo {
|
||||
id: db_id,
|
||||
name,
|
||||
backend,
|
||||
encrypted,
|
||||
redis_version: Some("7.0".to_string()),
|
||||
storage_path: Some(server.option.dir.clone()),
|
||||
size_on_disk,
|
||||
key_count,
|
||||
created_at,
|
||||
last_access,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[jsonrpsee::core::async_trait]
|
||||
impl RpcServer for RpcServerImpl {
|
||||
async fn create_database(
|
||||
&self,
|
||||
backend: BackendType,
|
||||
config: DatabaseConfig,
|
||||
encryption_key: Option<String>,
|
||||
) -> RpcResult<u64> {
|
||||
// Allocate new ID via admin DB 0
|
||||
let db_id = admin_meta::allocate_next_id(&self.base_dir, self.backend.clone(), &self.admin_secret)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
|
||||
// Persist per-db encryption key in admin DB 0 if provided
|
||||
if let Some(ref key) = encryption_key {
|
||||
admin_meta::set_enc_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, key)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
}
|
||||
|
||||
// Persist database name if provided
|
||||
if let Some(ref name) = config.name {
|
||||
admin_meta::set_database_name(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, name)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
}
|
||||
|
||||
// Ensure base dir exists
|
||||
if let Err(e) = std::fs::create_dir_all(&self.base_dir) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, format!("Failed to ensure base dir: {}", e), None::<()>));
|
||||
}
|
||||
|
||||
// Map RPC backend to options backend and persist it in admin meta for this db id
|
||||
let opt_backend = match backend {
|
||||
BackendType::Redb => crate::options::BackendType::Redb,
|
||||
BackendType::Sled => crate::options::BackendType::Sled,
|
||||
BackendType::Tantivy => crate::options::BackendType::Tantivy,
|
||||
};
|
||||
admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, opt_backend.clone())
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
|
||||
// Create server instance using base_dir, chosen backend and admin secret
|
||||
let is_tantivy_new = matches!(opt_backend, crate::options::BackendType::Tantivy);
|
||||
let option = DBOption {
|
||||
dir: self.base_dir.clone(),
|
||||
port: 0, // Not used for RPC-managed databases
|
||||
debug: false,
|
||||
encryption_key: None, // per-db key is stored in admin DB 0
|
||||
encrypt: false, // encryption decided per-db at open time
|
||||
backend: opt_backend.clone(),
|
||||
admin_secret: self.admin_secret.clone(),
|
||||
};
|
||||
|
||||
let mut server = Server::new(option).await;
|
||||
server.selected_db = db_id;
|
||||
|
||||
// Initialize storage to create physical <id>.db with proper encryption from admin meta
|
||||
// Skip for Tantivy backend (no KV storage to initialize)
|
||||
if !is_tantivy_new {
|
||||
let _ = server.current_storage();
|
||||
}
|
||||
|
||||
// Store the server in cache
|
||||
let mut servers = self.servers.write().await;
|
||||
servers.insert(db_id, Arc::new(server));
|
||||
|
||||
Ok(db_id)
|
||||
}
|
||||
|
||||
async fn set_encryption(&self, _db_id: u64, _encryption_key: String) -> RpcResult<bool> {
|
||||
// For now, return false as encryption can only be set during creation
|
||||
let _servers = self.servers.read().await;
|
||||
// TODO: Implement encryption setting for existing databases
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
async fn list_databases(&self) -> RpcResult<Vec<DatabaseInfo>> {
|
||||
let db_ids = self.discover_databases().await;
|
||||
let mut result = Vec::new();
|
||||
|
||||
for db_id in db_ids {
|
||||
if let Ok(server) = self.get_or_create_server(db_id).await {
|
||||
// Build accurate info from storage/meta/fs
|
||||
let info = self.build_database_info(db_id, &server).await;
|
||||
result.push(info);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn get_database_info(&self, db_id: u64) -> RpcResult<DatabaseInfo> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
// Build accurate info from storage/meta/fs
|
||||
let info = self.build_database_info(db_id, &server).await;
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
async fn delete_database(&self, db_id: u64) -> RpcResult<bool> {
|
||||
let mut servers = self.servers.write().await;
|
||||
|
||||
if let Some(_server) = servers.remove(&db_id) {
|
||||
// Clean up database files
|
||||
let db_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}.db", db_id));
|
||||
if db_path.exists() {
|
||||
if db_path.is_dir() {
|
||||
std::fs::remove_dir_all(&db_path).ok();
|
||||
} else {
|
||||
std::fs::remove_file(&db_path).ok();
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_server_stats(&self) -> RpcResult<HashMap<String, serde_json::Value>> {
|
||||
let db_ids = self.discover_databases().await;
|
||||
let mut stats = HashMap::new();
|
||||
|
||||
|
||||
stats.insert("total_databases".to_string(), serde_json::json!(db_ids.len()));
|
||||
stats.insert("uptime".to_string(), serde_json::json!(
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
));
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
// ----- Full-text (Tantivy) minimal RPC endpoints -----
|
||||
|
||||
async fn ft_create(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>,
|
||||
) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
crate::search_cmd::ft_create_cmd(&*server, index_name, schema)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn ft_add(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
score: f64,
|
||||
fields: HashMap<String, String>,
|
||||
) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
crate::search_cmd::ft_add_cmd(&*server, index_name, doc_id, score, fields)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn ft_search(
|
||||
&self,
|
||||
db_id: u64,
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Option<Vec<(String, String)>>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
) -> RpcResult<serde_json::Value> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
let proto = crate::search_cmd::ft_search_cmd(
|
||||
&*server,
|
||||
index_name,
|
||||
query,
|
||||
filters.unwrap_or_default(),
|
||||
limit,
|
||||
offset,
|
||||
return_fields,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(serde_json::json!({ "resp": proto.encode() }))
|
||||
}
|
||||
|
||||
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
crate::search_cmd::ft_del_cmd(&*server, index_name, doc_id)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
let proto = crate::search_cmd::ft_info_cmd(&*server, index_name)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(serde_json::json!({ "resp": proto.encode() }))
|
||||
}
|
||||
|
||||
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
if db_id == 0 {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
|
||||
}
|
||||
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
|
||||
}
|
||||
crate::search_cmd::ft_drop_cmd(&*server, index_name)
|
||||
.await
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool> {
|
||||
let perms = match permissions.to_lowercase().as_str() {
|
||||
"read" => Permissions::Read,
|
||||
"readwrite" => Permissions::ReadWrite,
|
||||
_ => return Err(jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
"Invalid permissions: use 'read' or 'readwrite'",
|
||||
None::<()>
|
||||
)),
|
||||
};
|
||||
|
||||
admin_meta::add_access_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, &key, perms)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn delete_access_key(&self, db_id: u64, key_hash: String) -> RpcResult<bool> {
|
||||
let ok = admin_meta::delete_access_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, &key_hash)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(ok)
|
||||
}
|
||||
|
||||
async fn list_access_keys(&self, db_id: u64) -> RpcResult<Vec<AccessKeyInfo>> {
|
||||
let pairs = admin_meta::list_access_keys(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
let keys: Vec<AccessKeyInfo> = pairs.into_iter().map(|(hash, perm, ts)| AccessKeyInfo {
|
||||
hash,
|
||||
permissions: perm,
|
||||
created_at: ts,
|
||||
}).collect();
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool> {
|
||||
admin_meta::set_database_public(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, public)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
49
src/rpc_server.rs
Normal file
49
src/rpc_server.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use std::net::SocketAddr;
|
||||
use jsonrpsee::server::{ServerBuilder, ServerHandle};
|
||||
use jsonrpsee::RpcModule;
|
||||
|
||||
use crate::rpc::{RpcServer, RpcServerImpl};
|
||||
|
||||
/// Start the RPC server on the specified address
|
||||
pub async fn start_rpc_server(addr: SocketAddr, base_dir: String, backend: crate::options::BackendType, admin_secret: String) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Create the RPC server implementation
|
||||
let rpc_impl = RpcServerImpl::new(base_dir, backend, admin_secret);
|
||||
|
||||
// Create the RPC module
|
||||
let mut module = RpcModule::new(());
|
||||
module.merge(RpcServer::into_rpc(rpc_impl))?;
|
||||
|
||||
// Build the server with both HTTP and WebSocket support
|
||||
let server = ServerBuilder::default()
|
||||
.build(addr)
|
||||
.await?;
|
||||
|
||||
// Start the server
|
||||
let handle = server.start(module);
|
||||
|
||||
println!("RPC server started on {}", addr);
|
||||
|
||||
Ok(handle)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::time::Duration;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rpc_server_startup() {
|
||||
let addr = "127.0.0.1:0".parse().unwrap(); // Use port 0 for auto-assignment
|
||||
let base_dir = "/tmp/test_rpc".to_string();
|
||||
let backend = crate::options::BackendType::Redb; // Default for test
|
||||
|
||||
let handle = start_rpc_server(addr, base_dir, backend, "test-admin".to_string()).await.unwrap();
|
||||
|
||||
// Give the server a moment to start
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
|
||||
// Stop the server
|
||||
handle.stop().unwrap();
|
||||
handle.stopped().await;
|
||||
}
|
||||
}
|
352
src/search_cmd.rs
Normal file
352
src/search_cmd.rs
Normal file
@@ -0,0 +1,352 @@
|
||||
use crate::{
|
||||
error::DBError,
|
||||
protocol::Protocol,
|
||||
server::Server,
|
||||
tantivy_search::{
|
||||
FieldDef, Filter, FilterType, IndexConfig, NumericType, SearchOptions, TantivySearch,
|
||||
},
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub async fn ft_create_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
schema: Vec<(String, String, Vec<String>)>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
|
||||
// Parse schema into field definitions
|
||||
let mut field_definitions = Vec::new();
|
||||
for (field_name, field_type, options) in schema {
|
||||
let field_def = match field_type.to_uppercase().as_str() {
|
||||
"TEXT" => {
|
||||
let mut sortable = false;
|
||||
let mut no_index = false;
|
||||
// Weight is not used in current implementation
|
||||
let mut _weight = 1.0f32;
|
||||
let mut i = 0;
|
||||
while i < options.len() {
|
||||
match options[i].to_uppercase().as_str() {
|
||||
"WEIGHT" => {
|
||||
if i + 1 < options.len() {
|
||||
_weight = options[i + 1].parse::<f32>().unwrap_or(1.0);
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
"SORTABLE" => {
|
||||
sortable = true;
|
||||
}
|
||||
"NOINDEX" => {
|
||||
no_index = true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
FieldDef::Text {
|
||||
stored: true,
|
||||
indexed: !no_index,
|
||||
tokenized: true,
|
||||
fast: sortable,
|
||||
}
|
||||
}
|
||||
"NUMERIC" => {
|
||||
// default to F64
|
||||
let mut sortable = false;
|
||||
for opt in &options {
|
||||
if opt.to_uppercase() == "SORTABLE" {
|
||||
sortable = true;
|
||||
}
|
||||
}
|
||||
FieldDef::Numeric {
|
||||
stored: true,
|
||||
indexed: true,
|
||||
fast: sortable,
|
||||
precision: NumericType::F64,
|
||||
}
|
||||
}
|
||||
"TAG" => {
|
||||
let mut separator = ",".to_string();
|
||||
let mut case_sensitive = false;
|
||||
let mut i = 0;
|
||||
while i < options.len() {
|
||||
match options[i].to_uppercase().as_str() {
|
||||
"SEPARATOR" => {
|
||||
if i + 1 < options.len() {
|
||||
separator = options[i + 1].clone();
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
"CASESENSITIVE" => {
|
||||
case_sensitive = true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
FieldDef::Tag {
|
||||
stored: true,
|
||||
separator,
|
||||
case_sensitive,
|
||||
}
|
||||
}
|
||||
"GEO" => FieldDef::Geo { stored: true },
|
||||
_ => {
|
||||
return Err(DBError(format!("Unknown field type: {}", field_type)));
|
||||
}
|
||||
};
|
||||
field_definitions.push((field_name, field_def));
|
||||
}
|
||||
|
||||
// Create the search index
|
||||
let search_path = server.search_index_path();
|
||||
let config = IndexConfig::default();
|
||||
let search_index = TantivySearch::new_with_schema(
|
||||
search_path,
|
||||
index_name.clone(),
|
||||
field_definitions,
|
||||
Some(config),
|
||||
)?;
|
||||
|
||||
// Store in registry
|
||||
let mut indexes = server.search_indexes.write().unwrap();
|
||||
indexes.insert(index_name, Arc::new(search_index));
|
||||
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_add_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
_score: f64,
|
||||
fields: HashMap<String, String>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
let search_index = indexes
|
||||
.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
search_index.add_document_with_fields(&doc_id, fields)?;
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_search_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
query: String,
|
||||
filters: Vec<(String, String)>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<usize>,
|
||||
return_fields: Option<Vec<String>>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
let search_index = indexes
|
||||
.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
|
||||
let search_filters = filters
|
||||
.into_iter()
|
||||
.map(|(field, value)| Filter {
|
||||
field,
|
||||
filter_type: FilterType::Equals(value),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let options = SearchOptions {
|
||||
limit: limit.unwrap_or(10),
|
||||
offset: offset.unwrap_or(0),
|
||||
filters: search_filters,
|
||||
sort_by: None,
|
||||
return_fields,
|
||||
highlight: false,
|
||||
};
|
||||
|
||||
let results = search_index.search_with_options(&query, options)?;
|
||||
|
||||
// Format results as Redis protocol
|
||||
let mut response = Vec::new();
|
||||
// First element is the total count
|
||||
response.push(Protocol::SimpleString(results.total.to_string()));
|
||||
// Then each document
|
||||
for doc in results.documents {
|
||||
let mut doc_array = Vec::new();
|
||||
// Add document ID if it exists
|
||||
if let Some(id) = doc.fields.get("_id") {
|
||||
doc_array.push(Protocol::BulkString(id.clone()));
|
||||
}
|
||||
// Add score
|
||||
doc_array.push(Protocol::BulkString(doc.score.to_string()));
|
||||
// Add fields as key-value pairs
|
||||
for (field_name, field_value) in doc.fields {
|
||||
if field_name != "_id" {
|
||||
doc_array.push(Protocol::BulkString(field_name));
|
||||
doc_array.push(Protocol::BulkString(field_value));
|
||||
}
|
||||
}
|
||||
response.push(Protocol::Array(doc_array));
|
||||
}
|
||||
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
|
||||
pub async fn ft_del_cmd(
|
||||
server: &Server,
|
||||
index_name: String,
|
||||
doc_id: String,
|
||||
) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
let _search_index = indexes
|
||||
.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
// Not fully implemented yet: Tantivy delete by term would require a writer session and commit coordination.
|
||||
println!("Deleting document '{}' from index '{}'", doc_id, index_name);
|
||||
Ok(Protocol::SimpleString("1".to_string()))
|
||||
}
|
||||
|
||||
pub async fn ft_info_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
let indexes = server.search_indexes.read().unwrap();
|
||||
let search_index = indexes
|
||||
.get(&index_name)
|
||||
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
|
||||
let info = search_index.get_info()?;
|
||||
|
||||
// Format info as Redis protocol
|
||||
let mut response = Vec::new();
|
||||
response.push(Protocol::BulkString("index_name".to_string()));
|
||||
response.push(Protocol::BulkString(info.name));
|
||||
response.push(Protocol::BulkString("num_docs".to_string()));
|
||||
response.push(Protocol::BulkString(info.num_docs.to_string()));
|
||||
response.push(Protocol::BulkString("num_fields".to_string()));
|
||||
response.push(Protocol::BulkString(info.fields.len().to_string()));
|
||||
response.push(Protocol::BulkString("fields".to_string()));
|
||||
let fields_str = info
|
||||
.fields
|
||||
.iter()
|
||||
.map(|f| format!("{}:{}", f.name, f.field_type))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
response.push(Protocol::BulkString(fields_str));
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
|
||||
pub async fn ft_drop_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
|
||||
if server.selected_db == 0 {
|
||||
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
|
||||
}
|
||||
// Enforce Tantivy backend for selected DB
|
||||
let is_tantivy = crate::admin_meta::get_database_backend(
|
||||
&server.option.dir,
|
||||
server.option.backend.clone(),
|
||||
&server.option.admin_secret,
|
||||
server.selected_db,
|
||||
)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
|
||||
.unwrap_or(false);
|
||||
if !is_tantivy {
|
||||
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
|
||||
}
|
||||
|
||||
// Remove from registry
|
||||
{
|
||||
let mut indexes = server.search_indexes.write().unwrap();
|
||||
indexes.remove(&index_name);
|
||||
}
|
||||
|
||||
// Remove the index files from disk
|
||||
let index_path = server.search_index_path().join(&index_name);
|
||||
if index_path.exists() {
|
||||
std::fs::remove_dir_all(&index_path)
|
||||
.map_err(|e| DBError(format!("Failed to remove index files: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
104
src/server.rs
104
src/server.rs
@@ -9,12 +9,10 @@ use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
use crate::cmd::Cmd;
|
||||
use crate::error::DBError;
|
||||
use crate::lance_store::LanceStore;
|
||||
use crate::options;
|
||||
use crate::protocol::Protocol;
|
||||
use crate::storage::Storage;
|
||||
use crate::storage_sled::SledStorage;
|
||||
use crate::storage_trait::StorageBackend;
|
||||
use crate::admin_meta;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Server {
|
||||
@@ -23,13 +21,14 @@ pub struct Server {
|
||||
pub client_name: Option<String>,
|
||||
pub selected_db: u64, // Changed from usize to u64
|
||||
pub queued_cmd: Option<Vec<(Cmd, Protocol)>>,
|
||||
pub current_permissions: Option<crate::rpc::Permissions>,
|
||||
|
||||
// In-memory registry of Tantivy search indexes for this server
|
||||
pub search_indexes: Arc<std::sync::RwLock<HashMap<String, Arc<crate::tantivy_search::TantivySearch>>>>,
|
||||
|
||||
// BLPOP waiter registry: per (db_index, key) FIFO of waiters
|
||||
pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>,
|
||||
pub waiter_seq: Arc<AtomicU64>,
|
||||
|
||||
// Lance vector store
|
||||
pub lance_store: Option<Arc<LanceStore>>,
|
||||
}
|
||||
|
||||
pub struct Waiter {
|
||||
@@ -46,81 +45,70 @@ pub enum PopSide {
|
||||
|
||||
impl Server {
|
||||
pub async fn new(option: options::DBOption) -> Self {
|
||||
// Initialize Lance store
|
||||
let lance_data_dir = std::path::PathBuf::from(&option.dir).join("lance");
|
||||
let lance_store = match LanceStore::new(lance_data_dir).await {
|
||||
Ok(store) => Some(Arc::new(store)),
|
||||
Err(e) => {
|
||||
eprintln!("Warning: Failed to initialize Lance store: {}", e.0);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
Server {
|
||||
db_cache: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
||||
option,
|
||||
client_name: None,
|
||||
selected_db: 0,
|
||||
queued_cmd: None,
|
||||
|
||||
current_permissions: None,
|
||||
|
||||
search_indexes: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
||||
list_waiters: Arc::new(Mutex::new(HashMap::new())),
|
||||
waiter_seq: Arc::new(AtomicU64::new(1)),
|
||||
lance_store,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lance_store(&self) -> Result<Arc<LanceStore>, DBError> {
|
||||
self.lance_store
|
||||
.as_ref()
|
||||
.cloned()
|
||||
.ok_or_else(|| DBError("Lance store not initialized".to_string()))
|
||||
// Path where search indexes are stored, namespaced per selected DB:
|
||||
// <base_dir>/search_indexes/<db_id>
|
||||
pub fn search_index_path(&self) -> std::path::PathBuf {
|
||||
let base = std::path::PathBuf::from(&self.option.dir)
|
||||
.join("search_indexes")
|
||||
.join(self.selected_db.to_string());
|
||||
if !base.exists() {
|
||||
let _ = std::fs::create_dir_all(&base);
|
||||
}
|
||||
base
|
||||
}
|
||||
|
||||
pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> {
|
||||
let mut cache = self.db_cache.write().unwrap();
|
||||
|
||||
|
||||
if let Some(storage) = cache.get(&self.selected_db) {
|
||||
return Ok(storage.clone());
|
||||
}
|
||||
|
||||
|
||||
// Create new database file
|
||||
let db_file_path = std::path::PathBuf::from(self.option.dir.clone())
|
||||
.join(format!("{}.db", self.selected_db));
|
||||
|
||||
// Ensure the directory exists before creating the database file
|
||||
if let Some(parent_dir) = db_file_path.parent() {
|
||||
std::fs::create_dir_all(parent_dir).map_err(|e| {
|
||||
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
|
||||
})?;
|
||||
}
|
||||
|
||||
println!("Creating new db file: {}", db_file_path.display());
|
||||
|
||||
let storage: Arc<dyn StorageBackend> = match self.option.backend {
|
||||
options::BackendType::Redb => {
|
||||
Arc::new(Storage::new(
|
||||
db_file_path,
|
||||
self.should_encrypt_db(self.selected_db),
|
||||
self.option.encryption_key.as_deref()
|
||||
)?)
|
||||
}
|
||||
options::BackendType::Sled => {
|
||||
Arc::new(SledStorage::new(
|
||||
db_file_path,
|
||||
self.should_encrypt_db(self.selected_db),
|
||||
self.option.encryption_key.as_deref()
|
||||
)?)
|
||||
}
|
||||
|
||||
// Use process-wide shared handles to avoid sled/reDB double-open lock contention.
|
||||
let storage = if self.selected_db == 0 {
|
||||
// Admin DB 0: always via singleton
|
||||
admin_meta::open_admin_storage(
|
||||
&self.option.dir,
|
||||
self.option.backend.clone(),
|
||||
&self.option.admin_secret,
|
||||
)?
|
||||
} else {
|
||||
// Data DBs: via global registry keyed by id
|
||||
admin_meta::open_data_storage(
|
||||
&self.option.dir,
|
||||
self.option.backend.clone(),
|
||||
&self.option.admin_secret,
|
||||
self.selected_db,
|
||||
)?
|
||||
};
|
||||
|
||||
|
||||
cache.insert(self.selected_db, storage.clone());
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
fn should_encrypt_db(&self, db_index: u64) -> bool {
|
||||
// DB 0-9 are non-encrypted, DB 10+ are encrypted
|
||||
self.option.encrypt && db_index >= 10
|
||||
|
||||
/// Check if current permissions allow read operations
|
||||
pub fn has_read_permission(&self) -> bool {
|
||||
matches!(self.current_permissions, Some(crate::rpc::Permissions::Read) | Some(crate::rpc::Permissions::ReadWrite))
|
||||
}
|
||||
|
||||
/// Check if current permissions allow write operations
|
||||
pub fn has_write_permission(&self) -> bool {
|
||||
matches!(self.current_permissions, Some(crate::rpc::Permissions::ReadWrite))
|
||||
}
|
||||
|
||||
// ----- BLPOP waiter helpers -----
|
||||
|
123
src/sym.rs
Normal file
123
src/sym.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
//! sym.rs — Stateless symmetric encryption (Phase 1)
|
||||
//!
|
||||
//! Commands implemented (RESP):
|
||||
//! - SYM KEYGEN
|
||||
//! - SYM ENCRYPT <key_b64> <message>
|
||||
//! - SYM DECRYPT <key_b64> <ciphertext_b64>
|
||||
//!
|
||||
//! Notes:
|
||||
//! - Raw key: exactly 32 bytes, provided as Base64 in commands.
|
||||
//! - Cipher: XChaCha20-Poly1305 (AEAD) without AAD in Phase 1
|
||||
//! - Ciphertext binary layout: [version:1][nonce:24][ciphertext||tag]
|
||||
//! - Encoding for wire I/O: Base64
|
||||
|
||||
use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
|
||||
use chacha20poly1305::{
|
||||
aead::{Aead, KeyInit, OsRng},
|
||||
XChaCha20Poly1305, XNonce,
|
||||
};
|
||||
use rand::RngCore;
|
||||
|
||||
use crate::protocol::Protocol;
|
||||
|
||||
const VERSION: u8 = 1;
|
||||
const NONCE_LEN: usize = 24;
|
||||
const TAG_LEN: usize = 16;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SymWireError {
|
||||
InvalidKey,
|
||||
BadEncoding,
|
||||
BadFormat,
|
||||
BadVersion(u8),
|
||||
Crypto,
|
||||
}
|
||||
|
||||
impl SymWireError {
|
||||
fn to_protocol(self) -> Protocol {
|
||||
match self {
|
||||
SymWireError::InvalidKey => Protocol::err("ERR sym: invalid key"),
|
||||
SymWireError::BadEncoding => Protocol::err("ERR sym: bad encoding"),
|
||||
SymWireError::BadFormat => Protocol::err("ERR sym: bad format"),
|
||||
SymWireError::BadVersion(v) => Protocol::err(&format!("ERR sym: unsupported version {}", v)),
|
||||
SymWireError::Crypto => Protocol::err("ERR sym: auth failed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_key_b64(s: &str) -> Result<chacha20poly1305::Key, SymWireError> {
|
||||
let bytes = B64.decode(s.as_bytes()).map_err(|_| SymWireError::BadEncoding)?;
|
||||
if bytes.len() != 32 {
|
||||
return Err(SymWireError::InvalidKey);
|
||||
}
|
||||
Ok(chacha20poly1305::Key::from_slice(&bytes).to_owned())
|
||||
}
|
||||
|
||||
fn encrypt_blob(key: &chacha20poly1305::Key, plaintext: &[u8]) -> Result<Vec<u8>, SymWireError> {
|
||||
let cipher = XChaCha20Poly1305::new(key);
|
||||
|
||||
let mut nonce_bytes = [0u8; NONCE_LEN];
|
||||
OsRng.fill_bytes(&mut nonce_bytes);
|
||||
let nonce = XNonce::from_slice(&nonce_bytes);
|
||||
|
||||
let mut out = Vec::with_capacity(1 + NONCE_LEN + plaintext.len() + TAG_LEN);
|
||||
out.push(VERSION);
|
||||
out.extend_from_slice(&nonce_bytes);
|
||||
|
||||
let ct = cipher.encrypt(nonce, plaintext).map_err(|_| SymWireError::Crypto)?;
|
||||
out.extend_from_slice(&ct);
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
fn decrypt_blob(key: &chacha20poly1305::Key, blob: &[u8]) -> Result<Vec<u8>, SymWireError> {
|
||||
if blob.len() < 1 + NONCE_LEN + TAG_LEN {
|
||||
return Err(SymWireError::BadFormat);
|
||||
}
|
||||
let ver = blob[0];
|
||||
if ver != VERSION {
|
||||
return Err(SymWireError::BadVersion(ver));
|
||||
}
|
||||
let nonce = XNonce::from_slice(&blob[1..1 + NONCE_LEN]);
|
||||
let ct = &blob[1 + NONCE_LEN..];
|
||||
|
||||
let cipher = XChaCha20Poly1305::new(key);
|
||||
cipher.decrypt(nonce, ct).map_err(|_| SymWireError::Crypto)
|
||||
}
|
||||
|
||||
// ---------- Command handlers (RESP) ----------
|
||||
|
||||
pub async fn cmd_sym_keygen() -> Protocol {
|
||||
let mut key_bytes = [0u8; 32];
|
||||
OsRng.fill_bytes(&mut key_bytes);
|
||||
let key_b64 = B64.encode(key_bytes);
|
||||
Protocol::BulkString(key_b64)
|
||||
}
|
||||
|
||||
pub async fn cmd_sym_encrypt(key_b64: &str, message: &str) -> Protocol {
|
||||
let key = match decode_key_b64(key_b64) {
|
||||
Ok(k) => k,
|
||||
Err(e) => return e.to_protocol(),
|
||||
};
|
||||
match encrypt_blob(&key, message.as_bytes()) {
|
||||
Ok(blob) => Protocol::BulkString(B64.encode(blob)),
|
||||
Err(e) => e.to_protocol(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cmd_sym_decrypt(key_b64: &str, ct_b64: &str) -> Protocol {
|
||||
let key = match decode_key_b64(key_b64) {
|
||||
Ok(k) => k,
|
||||
Err(e) => return e.to_protocol(),
|
||||
};
|
||||
let blob = match B64.decode(ct_b64.as_bytes()) {
|
||||
Ok(b) => b,
|
||||
Err(_) => return SymWireError::BadEncoding.to_protocol(),
|
||||
};
|
||||
match decrypt_blob(&key, &blob) {
|
||||
Ok(pt) => match String::from_utf8(pt) {
|
||||
Ok(s) => Protocol::BulkString(s),
|
||||
Err(_) => Protocol::err("ERR sym: invalid UTF-8 plaintext"),
|
||||
},
|
||||
Err(e) => e.to_protocol(),
|
||||
}
|
||||
}
|
667
src/tantivy_search.rs
Normal file
667
src/tantivy_search.rs
Normal file
@@ -0,0 +1,667 @@
|
||||
use crate::error::DBError;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use tantivy::{
|
||||
collector::TopDocs,
|
||||
directory::MmapDirectory,
|
||||
query::{BooleanQuery, Occur, Query, QueryParser, TermQuery},
|
||||
schema::{
|
||||
DateOptions, Field, IndexRecordOption, NumericOptions, Schema, TextFieldIndexing, TextOptions, STORED, STRING,
|
||||
},
|
||||
tokenizer::TokenizerManager,
|
||||
DateTime, Index, IndexReader, IndexWriter, TantivyDocument, Term,
|
||||
};
|
||||
use tantivy::schema::Value;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum FieldDef {
|
||||
Text {
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
tokenized: bool,
|
||||
fast: bool,
|
||||
},
|
||||
Numeric {
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fast: bool,
|
||||
precision: NumericType,
|
||||
},
|
||||
Tag {
|
||||
stored: bool,
|
||||
separator: String,
|
||||
case_sensitive: bool,
|
||||
},
|
||||
Geo {
|
||||
stored: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum NumericType {
|
||||
I64,
|
||||
U64,
|
||||
F64,
|
||||
Date,
|
||||
}
|
||||
|
||||
pub struct IndexSchema {
|
||||
schema: Schema,
|
||||
fields: HashMap<String, (Field, FieldDef)>,
|
||||
default_search_fields: Vec<Field>,
|
||||
}
|
||||
|
||||
pub struct TantivySearch {
|
||||
index: Index,
|
||||
writer: Arc<RwLock<IndexWriter>>,
|
||||
reader: IndexReader,
|
||||
index_schema: IndexSchema,
|
||||
name: String,
|
||||
config: IndexConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IndexConfig {
|
||||
pub language: String,
|
||||
pub stopwords: Vec<String>,
|
||||
pub stemming: bool,
|
||||
pub max_doc_count: Option<usize>,
|
||||
pub default_score: f64,
|
||||
}
|
||||
|
||||
impl Default for IndexConfig {
|
||||
fn default() -> Self {
|
||||
IndexConfig {
|
||||
language: "english".to_string(),
|
||||
stopwords: vec![],
|
||||
stemming: true,
|
||||
max_doc_count: None,
|
||||
default_score: 1.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TantivySearch {
|
||||
pub fn new_with_schema(
|
||||
base_path: PathBuf,
|
||||
name: String,
|
||||
field_definitions: Vec<(String, FieldDef)>,
|
||||
config: Option<IndexConfig>,
|
||||
) -> Result<Self, DBError> {
|
||||
let index_path = base_path.join(&name);
|
||||
std::fs::create_dir_all(&index_path)
|
||||
.map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?;
|
||||
|
||||
// Build schema from field definitions
|
||||
let mut schema_builder = Schema::builder();
|
||||
let mut fields = HashMap::new();
|
||||
let mut default_search_fields = Vec::new();
|
||||
|
||||
// Always add a document ID field
|
||||
let id_field = schema_builder.add_text_field("_id", STRING | STORED);
|
||||
fields.insert(
|
||||
"_id".to_string(),
|
||||
(
|
||||
id_field,
|
||||
FieldDef::Text {
|
||||
stored: true,
|
||||
indexed: true,
|
||||
tokenized: false,
|
||||
fast: false,
|
||||
},
|
||||
),
|
||||
);
|
||||
|
||||
// Add user-defined fields
|
||||
for (field_name, field_def) in field_definitions {
|
||||
let field = match &field_def {
|
||||
FieldDef::Text {
|
||||
stored,
|
||||
indexed,
|
||||
tokenized,
|
||||
fast: _fast,
|
||||
} => {
|
||||
let mut text_options = TextOptions::default();
|
||||
if *stored {
|
||||
text_options = text_options.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
let indexing_options = if *tokenized {
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("default")
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions)
|
||||
} else {
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("raw")
|
||||
.set_index_option(IndexRecordOption::Basic)
|
||||
};
|
||||
text_options = text_options.set_indexing_options(indexing_options);
|
||||
let f = schema_builder.add_text_field(&field_name, text_options);
|
||||
if *tokenized {
|
||||
default_search_fields.push(f);
|
||||
}
|
||||
f
|
||||
} else {
|
||||
schema_builder.add_text_field(&field_name, text_options)
|
||||
}
|
||||
}
|
||||
FieldDef::Numeric {
|
||||
stored,
|
||||
indexed,
|
||||
fast,
|
||||
precision,
|
||||
} => match precision {
|
||||
NumericType::I64 => {
|
||||
let mut opts = NumericOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
if *fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
schema_builder.add_i64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::U64 => {
|
||||
let mut opts = NumericOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
if *fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
schema_builder.add_u64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::F64 => {
|
||||
let mut opts = NumericOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
if *fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
schema_builder.add_f64_field(&field_name, opts)
|
||||
}
|
||||
NumericType::Date => {
|
||||
let mut opts = DateOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
if *indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
if *fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
schema_builder.add_date_field(&field_name, opts)
|
||||
}
|
||||
},
|
||||
FieldDef::Tag {
|
||||
stored,
|
||||
separator: _,
|
||||
case_sensitive: _,
|
||||
} => {
|
||||
let mut text_options = TextOptions::default();
|
||||
if *stored {
|
||||
text_options = text_options.set_stored();
|
||||
}
|
||||
text_options = text_options.set_indexing_options(
|
||||
TextFieldIndexing::default()
|
||||
.set_tokenizer("raw")
|
||||
.set_index_option(IndexRecordOption::Basic),
|
||||
);
|
||||
schema_builder.add_text_field(&field_name, text_options)
|
||||
}
|
||||
FieldDef::Geo { stored } => {
|
||||
// For now, store as two f64 fields for lat/lon
|
||||
let mut opts = NumericOptions::default();
|
||||
if *stored {
|
||||
opts = opts.set_stored();
|
||||
}
|
||||
opts = opts.set_indexed().set_fast();
|
||||
let lat_field =
|
||||
schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone());
|
||||
let lon_field =
|
||||
schema_builder.add_f64_field(&format!("{}_lon", field_name), opts);
|
||||
fields.insert(
|
||||
format!("{}_lat", field_name),
|
||||
(
|
||||
lat_field,
|
||||
FieldDef::Numeric {
|
||||
stored: *stored,
|
||||
indexed: true,
|
||||
fast: true,
|
||||
precision: NumericType::F64,
|
||||
},
|
||||
),
|
||||
);
|
||||
fields.insert(
|
||||
format!("{}_lon", field_name),
|
||||
(
|
||||
lon_field,
|
||||
FieldDef::Numeric {
|
||||
stored: *stored,
|
||||
indexed: true,
|
||||
fast: true,
|
||||
precision: NumericType::F64,
|
||||
},
|
||||
),
|
||||
);
|
||||
continue; // Skip adding the geo field itself
|
||||
}
|
||||
};
|
||||
fields.insert(field_name.clone(), (field, field_def));
|
||||
}
|
||||
|
||||
let schema = schema_builder.build();
|
||||
let index_schema = IndexSchema {
|
||||
schema: schema.clone(),
|
||||
fields,
|
||||
default_search_fields,
|
||||
};
|
||||
|
||||
// Create or open index
|
||||
let dir = MmapDirectory::open(&index_path)
|
||||
.map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?;
|
||||
let mut index =
|
||||
Index::open_or_create(dir, schema).map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||
|
||||
// Configure tokenizers
|
||||
let tokenizer_manager = TokenizerManager::default();
|
||||
index.set_tokenizers(tokenizer_manager);
|
||||
|
||||
let writer = index
|
||||
.writer(15_000_000)
|
||||
.map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?;
|
||||
let reader = index
|
||||
.reader()
|
||||
.map_err(|e| DBError(format!("Failed to create reader: {}", e)))?;
|
||||
|
||||
let config = config.unwrap_or_default();
|
||||
|
||||
Ok(TantivySearch {
|
||||
index,
|
||||
writer: Arc::new(RwLock::new(writer)),
|
||||
reader,
|
||||
index_schema,
|
||||
name,
|
||||
config,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn add_document_with_fields(
|
||||
&self,
|
||||
doc_id: &str,
|
||||
fields: HashMap<String, String>,
|
||||
) -> Result<(), DBError> {
|
||||
let mut writer = self
|
||||
.writer
|
||||
.write()
|
||||
.map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?;
|
||||
|
||||
// Delete existing document with same ID
|
||||
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||
writer.delete_term(Term::from_field_text(*id_field, doc_id));
|
||||
}
|
||||
|
||||
// Create new document
|
||||
let mut doc = tantivy::doc!();
|
||||
|
||||
// Add document ID
|
||||
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
|
||||
doc.add_text(*id_field, doc_id);
|
||||
}
|
||||
|
||||
// Add other fields based on schema
|
||||
for (field_name, field_value) in fields {
|
||||
if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) {
|
||||
match field_def {
|
||||
FieldDef::Text { .. } => {
|
||||
doc.add_text(*field, &field_value);
|
||||
}
|
||||
FieldDef::Numeric { precision, .. } => match precision {
|
||||
NumericType::I64 => {
|
||||
if let Ok(v) = field_value.parse::<i64>() {
|
||||
doc.add_i64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::U64 => {
|
||||
if let Ok(v) = field_value.parse::<u64>() {
|
||||
doc.add_u64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::F64 => {
|
||||
if let Ok(v) = field_value.parse::<f64>() {
|
||||
doc.add_f64(*field, v);
|
||||
}
|
||||
}
|
||||
NumericType::Date => {
|
||||
if let Ok(v) = field_value.parse::<i64>() {
|
||||
doc.add_date(*field, DateTime::from_timestamp_millis(v));
|
||||
}
|
||||
}
|
||||
},
|
||||
FieldDef::Tag {
|
||||
separator,
|
||||
case_sensitive,
|
||||
..
|
||||
} => {
|
||||
let tags = if !case_sensitive {
|
||||
field_value.to_lowercase()
|
||||
} else {
|
||||
field_value.clone()
|
||||
};
|
||||
for tag in tags.split(separator.as_str()) {
|
||||
doc.add_text(*field, tag.trim());
|
||||
}
|
||||
}
|
||||
FieldDef::Geo { .. } => {
|
||||
let parts: Vec<&str> = field_value.split(',').collect();
|
||||
if parts.len() == 2 {
|
||||
if let (Ok(lat), Ok(lon)) =
|
||||
(parts[0].parse::<f64>(), parts[1].parse::<f64>())
|
||||
{
|
||||
if let Some((lat_field, _)) =
|
||||
self.index_schema.fields.get(&format!("{}_lat", field_name))
|
||||
{
|
||||
doc.add_f64(*lat_field, lat);
|
||||
}
|
||||
if let Some((lon_field, _)) =
|
||||
self.index_schema.fields.get(&format!("{}_lon", field_name))
|
||||
{
|
||||
doc.add_f64(*lon_field, lon);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer
|
||||
.add_document(doc)
|
||||
.map_err(|e| DBError(format!("Failed to add document: {}", e)))?;
|
||||
writer
|
||||
.commit()
|
||||
.map_err(|e| DBError(format!("Failed to commit: {}", e)))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn search_with_options(
|
||||
&self,
|
||||
query_str: &str,
|
||||
options: SearchOptions,
|
||||
) -> Result<SearchResults, DBError> {
|
||||
let searcher = self.reader.searcher();
|
||||
|
||||
// Ensure we have searchable fields
|
||||
if self.index_schema.default_search_fields.is_empty() {
|
||||
return Err(DBError("No searchable fields defined in schema".to_string()));
|
||||
}
|
||||
|
||||
// Parse query based on search fields
|
||||
let query_parser = QueryParser::for_index(
|
||||
&self.index,
|
||||
self.index_schema.default_search_fields.clone(),
|
||||
);
|
||||
let parsed_query = query_parser
|
||||
.parse_query(query_str)
|
||||
.map_err(|e| DBError(format!("Failed to parse query: {}", e)))?;
|
||||
let mut clauses: Vec<(Occur, Box<dyn Query>)> = vec![(Occur::Must, parsed_query)];
|
||||
|
||||
// Apply filters if any
|
||||
for filter in options.filters {
|
||||
if let Some((field, field_def)) = self.index_schema.fields.get(&filter.field) {
|
||||
match filter.filter_type {
|
||||
FilterType::Equals(value) => {
|
||||
match field_def {
|
||||
FieldDef::Text { .. } | FieldDef::Tag { .. } => {
|
||||
let term_query =
|
||||
TermQuery::new(Term::from_field_text(*field, &value), IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(term_query)));
|
||||
}
|
||||
FieldDef::Numeric { precision, .. } => {
|
||||
// Equals on numeric fields: parse to the right numeric type and use term query
|
||||
match precision {
|
||||
NumericType::I64 => {
|
||||
if let Ok(v) = value.parse::<i64>() {
|
||||
let term = Term::from_field_i64(*field, v);
|
||||
let tq = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(tq)));
|
||||
}
|
||||
}
|
||||
NumericType::U64 => {
|
||||
if let Ok(v) = value.parse::<u64>() {
|
||||
let term = Term::from_field_u64(*field, v);
|
||||
let tq = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(tq)));
|
||||
}
|
||||
}
|
||||
NumericType::F64 => {
|
||||
if let Ok(v) = value.parse::<f64>() {
|
||||
let term = Term::from_field_f64(*field, v);
|
||||
let tq = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(tq)));
|
||||
}
|
||||
}
|
||||
NumericType::Date => {
|
||||
if let Ok(v) = value.parse::<i64>() {
|
||||
let dt = DateTime::from_timestamp_millis(v);
|
||||
let term = Term::from_field_date(*field, dt);
|
||||
let tq = TermQuery::new(term, IndexRecordOption::Basic);
|
||||
clauses.push((Occur::Must, Box::new(tq)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Geo { .. } => {
|
||||
// Geo equals isn't supported in this simplified version
|
||||
}
|
||||
}
|
||||
}
|
||||
FilterType::Range { .. } => {
|
||||
// TODO: Implement numeric range queries by building a RangeQuery per type
|
||||
}
|
||||
FilterType::InSet(values) => {
|
||||
// OR across values
|
||||
let mut sub_clauses: Vec<(Occur, Box<dyn Query>)> = vec![];
|
||||
for value in values {
|
||||
let term_query = TermQuery::new(
|
||||
Term::from_field_text(*field, &value),
|
||||
IndexRecordOption::Basic,
|
||||
);
|
||||
sub_clauses.push((Occur::Should, Box::new(term_query)));
|
||||
}
|
||||
clauses.push((Occur::Must, Box::new(BooleanQuery::new(sub_clauses))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_query: Box<dyn Query> = if clauses.len() == 1 {
|
||||
clauses.pop().unwrap().1
|
||||
} else {
|
||||
Box::new(BooleanQuery::new(clauses))
|
||||
};
|
||||
|
||||
// Execute search
|
||||
let top_docs = searcher
|
||||
.search(&*final_query, &TopDocs::with_limit(options.limit + options.offset))
|
||||
.map_err(|e| DBError(format!("Search failed: {}", e)))?;
|
||||
let total_hits = top_docs.len();
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for (score, doc_address) in top_docs.into_iter().skip(options.offset).take(options.limit) {
|
||||
let retrieved_doc: TantivyDocument = searcher
|
||||
.doc(doc_address)
|
||||
.map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?;
|
||||
|
||||
let mut doc_fields = HashMap::new();
|
||||
|
||||
// Extract stored fields (or synthesize)
|
||||
for (field_name, (field, field_def)) in &self.index_schema.fields {
|
||||
match field_def {
|
||||
FieldDef::Text { stored, .. } | FieldDef::Tag { stored, .. } => {
|
||||
if *stored {
|
||||
if let Some(value) = retrieved_doc.get_first(*field) {
|
||||
if let Some(text) = value.as_str() {
|
||||
doc_fields.insert(field_name.clone(), text.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Numeric {
|
||||
stored, precision, ..
|
||||
} => {
|
||||
if *stored {
|
||||
let value_str = match precision {
|
||||
NumericType::I64 => retrieved_doc
|
||||
.get_first(*field)
|
||||
.and_then(|v| v.as_i64())
|
||||
.map(|v| v.to_string()),
|
||||
NumericType::U64 => retrieved_doc
|
||||
.get_first(*field)
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v.to_string()),
|
||||
NumericType::F64 => retrieved_doc
|
||||
.get_first(*field)
|
||||
.and_then(|v| v.as_f64())
|
||||
.map(|v| v.to_string()),
|
||||
NumericType::Date => retrieved_doc
|
||||
.get_first(*field)
|
||||
.and_then(|v| v.as_datetime())
|
||||
.map(|v| v.into_timestamp_millis().to_string()),
|
||||
};
|
||||
if let Some(v) = value_str {
|
||||
doc_fields.insert(field_name.clone(), v);
|
||||
}
|
||||
}
|
||||
}
|
||||
FieldDef::Geo { stored } => {
|
||||
if *stored {
|
||||
let lat_field = self
|
||||
.index_schema
|
||||
.fields
|
||||
.get(&format!("{}_lat", field_name))
|
||||
.unwrap()
|
||||
.0;
|
||||
let lon_field = self
|
||||
.index_schema
|
||||
.fields
|
||||
.get(&format!("{}_lon", field_name))
|
||||
.unwrap()
|
||||
.0;
|
||||
let lat = retrieved_doc.get_first(lat_field).and_then(|v| v.as_f64());
|
||||
let lon = retrieved_doc.get_first(lon_field).and_then(|v| v.as_f64());
|
||||
if let (Some(lat), Some(lon)) = (lat, lon) {
|
||||
doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
documents.push(SearchDocument {
|
||||
fields: doc_fields,
|
||||
score,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(SearchResults {
|
||||
total: total_hits,
|
||||
documents,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_info(&self) -> Result<IndexInfo, DBError> {
|
||||
let searcher = self.reader.searcher();
|
||||
let num_docs = searcher.num_docs();
|
||||
let fields_info: Vec<FieldInfo> = self
|
||||
.index_schema
|
||||
.fields
|
||||
.iter()
|
||||
.map(|(name, (_, def))| FieldInfo {
|
||||
name: name.clone(),
|
||||
field_type: format!("{:?}", def),
|
||||
})
|
||||
.collect();
|
||||
Ok(IndexInfo {
|
||||
name: self.name.clone(),
|
||||
num_docs,
|
||||
fields: fields_info,
|
||||
config: self.config.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SearchOptions {
|
||||
pub limit: usize,
|
||||
pub offset: usize,
|
||||
pub filters: Vec<Filter>,
|
||||
pub sort_by: Option<String>,
|
||||
pub return_fields: Option<Vec<String>>,
|
||||
pub highlight: bool,
|
||||
}
|
||||
|
||||
impl Default for SearchOptions {
|
||||
fn default() -> Self {
|
||||
SearchOptions {
|
||||
limit: 10,
|
||||
offset: 0,
|
||||
filters: vec![],
|
||||
sort_by: None,
|
||||
return_fields: None,
|
||||
highlight: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Filter {
|
||||
pub field: String,
|
||||
pub filter_type: FilterType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FilterType {
|
||||
Equals(String),
|
||||
Range { min: String, max: String },
|
||||
InSet(Vec<String>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SearchResults {
|
||||
pub total: usize,
|
||||
pub documents: Vec<SearchDocument>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SearchDocument {
|
||||
pub fields: HashMap<String, String>,
|
||||
pub score: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct IndexInfo {
|
||||
pub name: String,
|
||||
pub num_docs: u64,
|
||||
pub fields: Vec<FieldInfo>,
|
||||
pub config: IndexConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct FieldInfo {
|
||||
pub name: String,
|
||||
pub field_type: String,
|
||||
}
|
@@ -28,6 +28,7 @@ async fn debug_hset_simple() {
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
admin_secret: "test-admin".to_string(),
|
||||
};
|
||||
|
||||
let mut server = Server::new(option).await;
|
||||
@@ -48,6 +49,12 @@ async fn debug_hset_simple() {
|
||||
sleep(Duration::from_millis(200)).await;
|
||||
|
||||
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
|
||||
// Acquire ReadWrite permissions on this connection
|
||||
let resp = send_command(
|
||||
&mut stream,
|
||||
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
|
||||
).await;
|
||||
assert!(resp.contains("OK"), "Failed SELECT handshake: {}", resp);
|
||||
|
||||
// Test simple HSET
|
||||
println!("Testing HSET...");
|
||||
|
@@ -19,6 +19,7 @@ async fn debug_hset_return_value() {
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
admin_secret: "test-admin".to_string(),
|
||||
};
|
||||
|
||||
let mut server = Server::new(option).await;
|
||||
@@ -40,12 +41,19 @@ async fn debug_hset_return_value() {
|
||||
|
||||
// Connect and test HSET
|
||||
let mut stream = TcpStream::connect("127.0.0.1:16390").await.unwrap();
|
||||
|
||||
// Acquire ReadWrite permissions for this new connection
|
||||
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
|
||||
stream.write_all(handshake.as_bytes()).await.unwrap();
|
||||
let mut buffer = [0; 1024];
|
||||
let n = stream.read(&mut buffer).await.unwrap();
|
||||
let resp = String::from_utf8_lossy(&buffer[..n]);
|
||||
assert!(resp.contains("OK"), "Failed SELECT handshake: {}", resp);
|
||||
|
||||
// Send HSET command
|
||||
let cmd = "*4\r\n$4\r\nHSET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n";
|
||||
stream.write_all(cmd.as_bytes()).await.unwrap();
|
||||
|
||||
let mut buffer = [0; 1024];
|
||||
let n = stream.read(&mut buffer).await.unwrap();
|
||||
let response = String::from_utf8_lossy(&buffer[..n]);
|
||||
|
||||
|
@@ -12,7 +12,15 @@ fn get_redis_connection(port: u16) -> Connection {
|
||||
match client.get_connection() {
|
||||
Ok(mut conn) => {
|
||||
if redis::cmd("PING").query::<String>(&mut conn).is_ok() {
|
||||
return conn;
|
||||
// Acquire ReadWrite permissions on this connection
|
||||
let sel: RedisResult<String> = redis::cmd("SELECT")
|
||||
.arg(0)
|
||||
.arg("KEY")
|
||||
.arg("test-admin")
|
||||
.query(&mut conn);
|
||||
if sel.is_ok() {
|
||||
return conn;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -78,6 +86,8 @@ fn setup_server() -> (ServerProcessGuard, u16) {
|
||||
"--port",
|
||||
&port.to_string(),
|
||||
"--debug",
|
||||
"--admin-secret",
|
||||
"test-admin",
|
||||
])
|
||||
.spawn()
|
||||
.expect("Failed to start server process");
|
||||
|
@@ -23,18 +23,29 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
admin_secret: "test-admin".to_string(),
|
||||
};
|
||||
|
||||
let server = Server::new(option).await;
|
||||
(server, port)
|
||||
}
|
||||
|
||||
// Helper function to connect to the test server
|
||||
// Helper function to connect to the test server
|
||||
async fn connect_to_server(port: u16) -> TcpStream {
|
||||
let mut attempts = 0;
|
||||
loop {
|
||||
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
|
||||
Ok(stream) => return stream,
|
||||
Ok(mut stream) => {
|
||||
// Obtain ReadWrite permissions for this connection by selecting DB 0 with admin key
|
||||
let resp = send_command(
|
||||
&mut stream,
|
||||
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
|
||||
).await;
|
||||
if !resp.contains("OK") {
|
||||
panic!("Failed to acquire write permissions via SELECT 0 KEY test-admin: {}", resp);
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
Err(_) if attempts < 10 => {
|
||||
attempts += 1;
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
|
85
tests/rpc_tests.rs
Normal file
85
tests/rpc_tests.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use herodb::rpc::{BackendType, DatabaseConfig};
|
||||
use herodb::admin_meta;
|
||||
use herodb::options::BackendType as OptionsBackendType;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rpc_server_basic() {
|
||||
// This test would require starting the RPC server in a separate thread
|
||||
// For now, we'll just test that the types compile correctly
|
||||
|
||||
// Test serialization of types
|
||||
let backend = BackendType::Redb;
|
||||
let config = DatabaseConfig {
|
||||
name: Some("test_db".to_string()),
|
||||
storage_path: Some("/tmp/test".to_string()),
|
||||
max_size: Some(1024 * 1024),
|
||||
redis_version: Some("7.0".to_string()),
|
||||
};
|
||||
|
||||
let backend_json = serde_json::to_string(&backend).unwrap();
|
||||
let config_json = serde_json::to_string(&config).unwrap();
|
||||
|
||||
assert_eq!(backend_json, "\"Redb\"");
|
||||
assert!(config_json.contains("test_db"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_database_config_serialization() {
|
||||
let config = DatabaseConfig {
|
||||
name: Some("my_db".to_string()),
|
||||
storage_path: None,
|
||||
max_size: Some(1000000),
|
||||
redis_version: Some("7.0".to_string()),
|
||||
};
|
||||
|
||||
let json = serde_json::to_value(&config).unwrap();
|
||||
assert_eq!(json["name"], "my_db");
|
||||
assert_eq!(json["max_size"], 1000000);
|
||||
assert_eq!(json["redis_version"], "7.0");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_backend_type_serialization() {
|
||||
// Test that both Redb and Sled backends serialize correctly
|
||||
let redb_backend = BackendType::Redb;
|
||||
let sled_backend = BackendType::Sled;
|
||||
|
||||
let redb_json = serde_json::to_string(&redb_backend).unwrap();
|
||||
let sled_json = serde_json::to_string(&sled_backend).unwrap();
|
||||
|
||||
assert_eq!(redb_json, "\"Redb\"");
|
||||
assert_eq!(sled_json, "\"Sled\"");
|
||||
|
||||
// Test deserialization
|
||||
let redb_deserialized: BackendType = serde_json::from_str(&redb_json).unwrap();
|
||||
let sled_deserialized: BackendType = serde_json::from_str(&sled_json).unwrap();
|
||||
|
||||
assert!(matches!(redb_deserialized, BackendType::Redb));
|
||||
assert!(matches!(sled_deserialized, BackendType::Sled));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_database_name_persistence() {
|
||||
let base_dir = "/tmp/test_db_name_persistence";
|
||||
let admin_secret = "test-admin-secret";
|
||||
let backend = OptionsBackendType::Redb;
|
||||
let db_id = 1;
|
||||
let test_name = "test-database-name";
|
||||
|
||||
// Clean up any existing test data
|
||||
let _ = std::fs::remove_dir_all(base_dir);
|
||||
|
||||
// Set the database name
|
||||
admin_meta::set_database_name(base_dir, backend.clone(), admin_secret, db_id, test_name)
|
||||
.expect("Failed to set database name");
|
||||
|
||||
// Retrieve the database name
|
||||
let retrieved_name = admin_meta::get_database_name(base_dir, backend, admin_secret, db_id)
|
||||
.expect("Failed to get database name");
|
||||
|
||||
// Verify the name matches
|
||||
assert_eq!(retrieved_name, Some(test_name.to_string()));
|
||||
|
||||
// Clean up
|
||||
let _ = std::fs::remove_dir_all(base_dir);
|
||||
}
|
@@ -25,6 +25,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
admin_secret: "test-admin".to_string(),
|
||||
};
|
||||
|
||||
let server = Server::new(option).await;
|
||||
@@ -34,9 +35,16 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
// Helper function to send Redis command and get response
|
||||
async fn send_redis_command(port: u16, command: &str) -> String {
|
||||
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
|
||||
|
||||
// Acquire ReadWrite permissions on this new connection
|
||||
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
|
||||
stream.write_all(handshake.as_bytes()).await.unwrap();
|
||||
let mut buffer = [0; 1024];
|
||||
let _ = stream.read(&mut buffer).await.unwrap(); // Read and ignore the OK for handshake
|
||||
|
||||
// Now send the intended command
|
||||
stream.write_all(command.as_bytes()).await.unwrap();
|
||||
|
||||
let mut buffer = [0; 1024];
|
||||
let n = stream.read(&mut buffer).await.unwrap();
|
||||
String::from_utf8_lossy(&buffer[..n]).to_string()
|
||||
}
|
||||
@@ -184,12 +192,19 @@ async fn test_transaction_operations() {
|
||||
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
|
||||
// Use a single connection for the transaction
|
||||
// Use a single connection for the transaction
|
||||
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
|
||||
|
||||
// Acquire write permissions for this connection
|
||||
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
|
||||
stream.write_all(handshake.as_bytes()).await.unwrap();
|
||||
let mut buffer = [0; 1024];
|
||||
let n = stream.read(&mut buffer).await.unwrap();
|
||||
let resp = String::from_utf8_lossy(&buffer[..n]);
|
||||
assert!(resp.contains("OK"));
|
||||
|
||||
// Test MULTI
|
||||
stream.write_all("*1\r\n$5\r\nMULTI\r\n".as_bytes()).await.unwrap();
|
||||
let mut buffer = [0; 1024];
|
||||
let n = stream.read(&mut buffer).await.unwrap();
|
||||
let response = String::from_utf8_lossy(&buffer[..n]);
|
||||
assert!(response.contains("OK"));
|
||||
|
@@ -23,6 +23,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
admin_secret: "test-admin".to_string(),
|
||||
};
|
||||
|
||||
let server = Server::new(option).await;
|
||||
@@ -38,12 +39,22 @@ async fn send_command(stream: &mut TcpStream, command: &str) -> String {
|
||||
String::from_utf8_lossy(&buffer[..n]).to_string()
|
||||
}
|
||||
|
||||
// Helper function to connect to the test server
|
||||
// Helper function to connect to the test server
|
||||
async fn connect_to_server(port: u16) -> TcpStream {
|
||||
let mut attempts = 0;
|
||||
loop {
|
||||
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
|
||||
Ok(stream) => return stream,
|
||||
Ok(mut stream) => {
|
||||
// Acquire ReadWrite permissions for this connection
|
||||
let resp = send_command(
|
||||
&mut stream,
|
||||
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
|
||||
).await;
|
||||
if !resp.contains("OK") {
|
||||
panic!("Failed to acquire write permissions via SELECT 0 KEY test-admin: {}", resp);
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
Err(_) if attempts < 10 => {
|
||||
attempts += 1;
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
@@ -97,14 +108,21 @@ async fn test_hset_clean_db() {
|
||||
sleep(Duration::from_millis(200)).await;
|
||||
|
||||
let mut stream = connect_to_server(port).await;
|
||||
|
||||
// Test HSET - should return 1 for new field
|
||||
let response = send_command(&mut stream, "*4\r\n$4\r\nHSET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n").await;
|
||||
|
||||
// Ensure clean DB state (admin DB 0 may be shared due to global singleton)
|
||||
let flush = send_command(&mut stream, "*1\r\n$7\r\nFLUSHDB\r\n").await;
|
||||
assert!(flush.contains("OK"), "Failed to FLUSHDB: {}", flush);
|
||||
|
||||
// Test HSET - should return 1 for new field (use a unique key name to avoid collisions)
|
||||
let key = "hash_clean";
|
||||
let hset_cmd = format!("*4\r\n$4\r\nHSET\r\n${}\r\n{}\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n", key.len(), key);
|
||||
let response = send_command(&mut stream, &hset_cmd).await;
|
||||
println!("HSET response: {}", response);
|
||||
assert!(response.contains("1"), "Expected HSET to return 1, got: {}", response);
|
||||
|
||||
// Test HGET
|
||||
let response = send_command(&mut stream, "*3\r\n$4\r\nHGET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n").await;
|
||||
let hget_cmd = format!("*3\r\n$4\r\nHGET\r\n${}\r\n{}\r\n$6\r\nfield1\r\n", key.len(), key);
|
||||
let response = send_command(&mut stream, &hget_cmd).await;
|
||||
println!("HGET response: {}", response);
|
||||
assert!(response.contains("value1"));
|
||||
}
|
||||
|
@@ -23,6 +23,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
admin_secret: "test-admin".to_string(),
|
||||
};
|
||||
|
||||
let server = Server::new(option).await;
|
||||
@@ -61,7 +62,17 @@ async fn connect(port: u16) -> TcpStream {
|
||||
let mut attempts = 0;
|
||||
loop {
|
||||
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
|
||||
Ok(s) => return s,
|
||||
Ok(mut s) => {
|
||||
// Acquire ReadWrite permissions for this connection using admin DB 0
|
||||
let resp = send_cmd(&mut s, &["SELECT", "0", "KEY", "test-admin"]).await;
|
||||
assert_contains(&resp, "OK", "SELECT 0 KEY test-admin handshake");
|
||||
|
||||
// Ensure clean slate per test on DB 0
|
||||
let fl = send_cmd(&mut s, &["FLUSHDB"]).await;
|
||||
assert_contains(&fl, "OK", "FLUSHDB after handshake");
|
||||
|
||||
return s;
|
||||
}
|
||||
Err(_) if attempts < 30 => {
|
||||
attempts += 1;
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
@@ -246,9 +257,9 @@ async fn test_01_connection_and_info() {
|
||||
let getname = send_cmd(&mut s, &["CLIENT", "GETNAME"]).await;
|
||||
assert_contains(&getname, "myapp", "CLIENT GETNAME");
|
||||
|
||||
// SELECT db
|
||||
let sel = send_cmd(&mut s, &["SELECT", "0"]).await;
|
||||
assert_contains(&sel, "OK", "SELECT 0");
|
||||
// SELECT db (requires key on DB 0)
|
||||
let sel = send_cmd(&mut s, &["SELECT", "0", "KEY", "test-admin"]).await;
|
||||
assert_contains(&sel, "OK", "SELECT 0 with key");
|
||||
|
||||
// QUIT should close connection after sending OK
|
||||
let quit = send_cmd(&mut s, &["QUIT"]).await;
|
||||
@@ -279,7 +290,11 @@ async fn test_02_strings_and_expiry() {
|
||||
|
||||
let ex0 = send_cmd(&mut s, &["EXISTS", "user:1"]).await;
|
||||
assert_contains(&ex0, "0", "EXISTS after DEL");
|
||||
|
||||
|
||||
// DEL non-existent should return 0
|
||||
let del0 = send_cmd(&mut s, &["DEL", "user:1"]).await;
|
||||
assert_contains(&del0, "0", "DEL user:1 when not exists -> 0");
|
||||
|
||||
// INCR behavior
|
||||
let i1 = send_cmd(&mut s, &["INCR", "count"]).await;
|
||||
assert_contains(&i1, "1", "INCR new key -> 1");
|
||||
@@ -501,11 +516,11 @@ async fn test_07_age_stateless_suite() {
|
||||
let mut s = connect(port).await;
|
||||
|
||||
// GENENC -> [recipient, identity]
|
||||
let gen = send_cmd(&mut s, &["AGE", "GENENC"]).await;
|
||||
let genenc = send_cmd(&mut s, &["AGE", "GENENC"]).await;
|
||||
assert!(
|
||||
gen.starts_with("*2\r\n$"),
|
||||
genenc.starts_with("*2\r\n$"),
|
||||
"AGE GENENC should return array [recipient, identity], got:\n{}",
|
||||
gen
|
||||
genenc
|
||||
);
|
||||
|
||||
// Parse simple RESP array of two bulk strings to extract keys
|
||||
@@ -520,7 +535,7 @@ async fn test_07_age_stateless_suite() {
|
||||
let ident = lines.next().unwrap_or("").to_string();
|
||||
(recip, ident)
|
||||
}
|
||||
let (recipient, identity) = parse_two_bulk_array(&gen);
|
||||
let (recipient, identity) = parse_two_bulk_array(&genenc);
|
||||
assert!(
|
||||
recipient.starts_with("age1") && identity.starts_with("AGE-SECRET-KEY-1"),
|
||||
"Unexpected AGE key formats.\nrecipient: {}\nidentity: {}",
|
||||
@@ -591,7 +606,7 @@ async fn test_08_age_persistent_named_suite() {
|
||||
|
||||
// AGE LIST
|
||||
let lst = send_cmd(&mut s, &["AGE", "LIST"]).await;
|
||||
assert_contains(&lst, "encpub", "AGE LIST label encpub");
|
||||
// After flattening, LIST returns a flat array of managed key names
|
||||
assert_contains(&lst, "app1", "AGE LIST includes app1");
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user