38 Commits

Author SHA1 Message Date
Maxime Van Hees
22ac4c9ed6 implementation of tantivy datastore + updated RPC calls to deal with tantivy + docs 2025-09-23 17:15:40 +02:00
Maxime Van Hees
c470772a13 Merge branch 'management_rpc_server' 2025-09-22 16:26:53 +02:00
Maxime Van Hees
bd34fd092a Persist backend per database id in admin metadata so restarts and lazy opens always use the correct engine (Sled/Redb) 2025-09-22 15:29:58 +02:00
Maxime Van Hees
8e044a64b7 fix incorrect keycount displayed in database info over RPC calls 2025-09-19 14:04:03 +02:00
Maxime Van Hees
87177f4a07 update documentation about 0.db admin db + symmetric encryption + include RPC examples + asymmetric transpart named key instances for encryption and signatures 2025-09-19 11:55:28 +02:00
Maxime Van Hees
151a6ffbfa fixed test 2025-09-19 10:35:08 +02:00
Maxime Van Hees
8ab841f68c Key generation now automatically derives X25519 keys from Ed25519 keys which allows user to transparantly use their key name for encrypting/decrypting and signing/verifying 2025-09-18 22:37:19 +02:00
Maxime Van Hees
8808c0e9d9 Implemented symmetric encryption; new commands are SYM KEYGEN; SYM ENCRYPT; SYM DECRYPT 2025-09-18 11:59:44 +02:00
Maxime Van Hees
c6b277cc9c fixed DEL showing wrong deletion amount + AGE LIST now returns a list of managed keys names without nested arrays or labels 2025-09-18 00:19:40 +02:00
8331ed032b ... 2025-09-17 07:02:44 +02:00
Maxime Van Hees
b8ca73397d implemented 0.db as admin database architecture + updated test file 2025-09-16 16:06:47 +02:00
Maxime Van Hees
1b15806a85 fix invalid values in RPC response about database instance details 2025-09-15 13:45:37 +02:00
Maxime Van Hees
da325a9659 fix bug where meta files where not auto-created upon starting + fix bug where meta json files were actually binary + improved access control to database instances 2025-09-15 10:34:03 +02:00
Maxime Van Hees
bdf363016a WIP: adding access management control to db instances 2025-09-12 17:11:50 +02:00
Maxime Van Hees
8798bc202e Restore working code 2025-09-11 18:33:09 +02:00
Maxime Van Hees
9fa9832605 combined curret main (with sled) and RPC server 2025-09-11 17:23:46 +02:00
Maxime Van Hees
4bb24b38dd fix typo in README 2025-09-11 15:34:03 +02:00
Maxime Van Hees
f3da14b957 Merge branch 'append' 2025-09-11 15:31:47 +02:00
Maxime Van Hees
5ea34b4445 update variable name as 'gen' is a reserved keyword since Rust 2024 edition 2025-09-11 15:25:26 +02:00
Maxime Van Hees
d9a3b711d1 Update tot Rust 2024 edition + update Cargo.toml file 2025-09-11 15:24:28 +02:00
Maxime Van Hees
d931770e90 Fix test suite + update Cargo.toml file 2025-09-09 16:04:31 +02:00
Timur Gordon
a87ec4dbb5 add readme 2025-08-27 15:39:59 +02:00
a1127b72da ... 2025-08-23 05:20:42 +02:00
3850df89be ... 2025-08-23 05:15:45 +02:00
45195d403e ... 2025-08-23 05:12:17 +02:00
f17b441ca1 ... 2025-08-23 05:07:45 +02:00
ff4ea1d844 ... 2025-08-23 05:04:37 +02:00
c9e1dcdb6c ... 2025-08-23 04:57:47 +02:00
56699b9abb Merge branch 'main' into append
* main:
  fixed test not running due to misaligned path
2025-08-22 17:24:12 +02:00
dd90a49615 sled backend 2025-08-22 17:20:44 +02:00
9054737e84 ... 2025-08-22 17:09:08 +02:00
09553f54c8 ... 2025-08-22 16:56:33 +02:00
Maxime Van Hees
58cb1e8d5e fixed test not running due to misaligned path 2025-08-22 16:54:43 +02:00
d3d92819cf ... 2025-08-22 16:26:04 +02:00
4fd48f8b0d ... 2025-08-22 16:16:26 +02:00
4bedf71c2d Update herodb/instructions/age_usage.md 2025-08-22 14:02:58 +00:00
b9987a027b Merge pull request 'clean workspace: remove supervisor packe + add instructions about how AGE redis commands work' (#2) from blpop into main
Reviewed-on: #2
2025-08-22 12:25:59 +00:00
f22a25f5a1 Merge pull request 'BLPOP + COMMAND + MGET/MSET + DEL/EXISTS + EXPIRE/PEXPIRE/PERSIST + HINCRBY/HINCRBYFLOAT + BRPOP + DBSIZE + EXPIREAT/PEXIREAT implementations' (#1) from blpop into main
Reviewed-on: #1
2025-08-22 11:41:37 +00:00
67 changed files with 18215 additions and 877 deletions

1841
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +1,32 @@
[workspace]
members = ["herodb"]
resolver = "2"
[package]
name = "herodb"
version = "0.0.1"
authors = ["ThreeFold Tech NV"]
edition = "2024"
# You can define shared profiles for all workspace members here
[profile.release]
lto = true
codegen-units = 1
strip = true
[dependencies]
anyhow = "1.0.59"
bytes = "1.3.0"
thiserror = "1.0.32"
tokio = { version = "1.23.0", features = ["full"] }
clap = { version = "4.5.20", features = ["derive"] }
byteorder = "1.4.3"
futures = "0.3"
sled = "0.34"
redb = "2.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
bincode = "1.3"
chacha20poly1305 = "0.10.1"
rand = "0.8"
sha2 = "0.10"
age = "0.10"
secrecy = "0.8"
ed25519-dalek = "2"
x25519-dalek = "2"
base64 = "0.22"
jsonrpsee = { version = "0.26.0", features = ["http-client", "ws-client", "server", "macros"] }
tantivy = "0.25.0"
[dev-dependencies]
redis = { version = "0.24", features = ["aio", "tokio-comp"] }

84
README.md Normal file
View File

@@ -0,0 +1,84 @@
# HeroDB
HeroDB is a Redis-compatible database built with Rust, offering a flexible and secure storage solution. It supports two primary storage backends: `redb` (default) and `sled`, both with full encryption capabilities. HeroDB aims to provide a robust and performant key-value store with advanced features like data-at-rest encryption, hash operations, list operations, and cursor-based scanning.
## Purpose
The main purpose of HeroDB is to offer a lightweight, embeddable, and Redis-compatible database that prioritizes data security through transparent encryption. It's designed for applications that require fast, reliable data storage with the option for strong cryptographic protection, without the overhead of a full-fledged Redis server.
## Features
- **Redis Compatibility**: Supports a subset of Redis commands over RESP (Redis Serialization Protocol) via TCP.
- **Dual Backend Support**:
- `redb` (default): Optimized for concurrent access and high-throughput scenarios.
- `sled`: A lock-free, log-structured database, excellent for specific workloads.
- **Data-at-Rest Encryption**: Transparent encryption for both backends using the `age` encryption library.
- **Key-Value Operations**: Full support for basic string, hash, and list operations.
- **Expiration**: Time-to-live (TTL) functionality for keys.
- **Scanning**: Cursor-based iteration for keys and hash fields (`SCAN`, `HSCAN`).
- **AGE Cryptography Commands**: HeroDB-specific extensions for cryptographic operations.
- **Symmetric Encryption**: Stateless symmetric encryption using XChaCha20-Poly1305.
- **Admin Database 0**: Centralized control for database management, access control, and per-database encryption.
## Quick Start
### Building HeroDB
To build HeroDB, navigate to the project root and run:
```bash
cargo build --release
```
### Running HeroDB
Launch HeroDB with the required `--admin-secret` flag, which encrypts the admin database (DB 0) and authorizes admin access. Optional flags include `--dir` for the database directory, `--port` for the TCP port (default 6379), `--sled` for the sled backend, and `--enable-rpc` to start the JSON-RPC management server on port 8080.
Example:
```bash
./target/release/herodb --dir /tmp/herodb --admin-secret myadminsecret --port 6379 --enable-rpc
```
For detailed launch options, see [Basics](docs/basics.md).
## Usage with Redis Clients
HeroDB can be interacted with using any standard Redis client, such as `redis-cli`, `redis-py` (Python), or `ioredis` (Node.js).
### Example with `redis-cli`
```bash
redis-cli -p 6379 SET mykey "Hello from HeroDB!"
redis-cli -p 6379 GET mykey
# → "Hello from HeroDB!"
redis-cli -p 6379 HSET user:1 name "Alice" age "30"
redis-cli -p 6379 HGET user:1 name
# → "Alice"
redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10
# → 1) "0"
# 2) 1) "user:1"
```
## Cryptography
HeroDB supports asymmetric encryption/signatures via AGE commands (X25519 for encryption, Ed25519 for signatures) in stateless or key-managed modes, and symmetric encryption via SYM commands. Keys are persisted in the admin database (DB 0) for managed modes.
For details, see [AGE Cryptography](docs/age.md) and [Basics](docs/basics.md).
## Database Management
Databases are managed via JSON-RPC API, with metadata stored in the encrypted admin database (DB 0). Databases are public by default upon creation; use RPC to set them private, requiring access keys for SELECT operations (read or readwrite based on permissions). This includes per-database encryption keys, access control, and lifecycle management.
For examples, see [JSON-RPC Examples](docs/rpc_examples.md) and [Admin DB 0 Model](docs/admin.md).
## Documentation
For more detailed information on commands, features, and advanced usage, please refer to the documentation:
- [Basics](docs/basics.md)
- [Supported Commands](docs/cmds.md)
- [AGE Cryptography](docs/age.md)
- [Admin DB 0 Model (access control, per-db encryption)](docs/admin.md)
- [JSON-RPC Examples (management API)](docs/rpc_examples.md)

181
docs/admin.md Normal file
View File

@@ -0,0 +1,181 @@
# Admin Database 0 (`0.db`)
This page explains what the Admin Database `DB 0` is, why HeroDB uses it, and how to work with it as a developer and end-user. Its a practical guide covering how databases are created, listed, secured with access keys, and encrypted using per-database secrets.
## What is `DB 0`?
`DB 0` is the control-plane for a HeroDB instance. It stores metadata for all user databases (`db_id >= 1`) so the server can:
- Know which databases exist (without scanning the filesystem)
- Enforce access control (public/private with access keys)
- Enforce per-database encryption (whether a given database must be opened encrypted and with which write-only key)
`DB 0` itself is always encrypted with the admin secret (the process-level secret provided at startup).
## How `DB 0` is created and secured
- `DB 0` lives at `<base_dir>/0.db`
- It is always encrypted using the `admin secret` provided at process startup (using the `--admin-secret <secret>` CLI flag)
- Only clients that provide the correct admin secret can `SELECT 0` (see “`SELECT` + `KEY`” below)
At startup, the server bootstraps `DB 0` (initializes counters and structures) if its missing.
## Metadata stored in `DB 0`
Keys in `DB 0` (internal layout, but useful to understand how things work):
- `admin:next_id`
- String counter holding the next id to allocate (initialized to `"1"`)
- `admin:dbs`
- A hash acting as a set of existing database ids
- field = id (as string), value = `"1"`
- `meta:db:<id>`
- A hash holding db-level metadata
- field `public` = `"true"` or `"false"` (defaults to `true` if missing)
- `meta:db:<id>:keys`
- A hash mapping access-key hashes to the string `Permission:created_at_seconds`
- Examples: `Read:1713456789` or `ReadWrite:1713456789`
- The plaintext access keys are never stored; only their `SHA-256` hashes are kept
- `meta:db:<id>:enc`
- A string holding the per-database encryption key used to open `<id>.db` encrypted
- This value is write-only from the perspective of the management APIs (its set at creation and never returned)
- `age:key:<name>`
- Base64-encoded X25519 recipient (public encryption key) for named AGE keys
- `age:privkey:<name>`
- Base64-encoded X25519 identity (secret encryption key) for named AGE keys
- `age:signpub:<name>`
- Base64-encoded Ed25519 verify public key for named AGE keys
- `age:signpriv:<name>`
- Base64-encoded Ed25519 signing secret key for named AGE keys
> You dont need to manipulate these keys directly; theyre listed to clarify the model. AGE keys are managed via AGE commands.
## Database lifecycle
1) Create a database (via JSON-RPC)
- The server allocates an id from `admin:next_id`, registers it in `admin:dbs`, and defaults the database to `public=true`
- If you pass an optional `encryption_key` during creation, the server persists it in `meta:db:<id>:enc`. That database will be opened in encrypted mode from then on
2) Open and use a database
- Clients select a database over RESP using `SELECT`
- Authorization and encryption state are enforced using `DB 0` metadata
3) Delete database files
- Removing `<id>.db` removes the physical storage
- `DB 0` remains the source of truth for existence and may be updated by future management methods as the system evolves
## Access control model
- Public database (default)
- Anyone can `SELECT <id>` with no key, and will get `ReadWrite` permission
- Private database
- You must provide an access key when selecting the database
- The server hashes the provided key with `SHA-256` and checks membership in `meta:db:<id>:keys`
- Permissions are `Read` or `ReadWrite` depending on how the key was added
- Admin `DB 0`
- Requires the exact admin secret as the `KEY` argument to `SELECT 0`
- Permission is `ReadWrite` when the secret matches
### How to select databases with optional `KEY`
- Public DB (no key required)
- `SELECT <id>`
- Private DB (access key required)
- `SELECT <id> KEY <plaintext_key>`
- Admin `DB 0` (admin secret required)
- `SELECT 0 KEY <admin_secret>`
Examples (using `redis-cli`):
```bash
# Public database
redis-cli -p $PORT SELECT 1
# → OK
# Private database
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
# → OK
# Admin DB 0
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
# → OK
```
## Per-database encryption
- At database creation, you can provide an optional per-db encryption key
- If provided, the server persists that key in `DB 0` as `meta:db:<id>:enc`
- When you later open the database, the engine checks whether `meta:db:<id>:enc` exists to decide if it must open `<id>.db` in encrypted mode
- The per-db key is not returned by RPC—it is considered write-only configuration data
Operationally:
- Create with encryption: pass a non-null `encryption_key` to the `createDatabase` RPC
- Open later: simply `SELECT` the database; encryption is transparent to clients
## Management via JSON-RPC
You can manage databases using the management RPC (namespaced `herodb.*`). Typical operations:
- `createDatabase(backend, config, encryption_key?)`
- Allocates a new id, sets optional encryption key
- `listDatabases()`
- Lists database ids and info (including whether storage is currently encrypted)
- `getDatabaseInfo(db_id)`
- Returns details: backend, encrypted flag, size on disk, `key_count`, timestamps, etc.
- `addAccessKey(db_id, key, permissions)`
- Adds a `Read` or `ReadWrite` access key (permissions = `"read"` | `"readwrite"`)
- `listAccessKeys(db_id)`
- Returns hashes and permissions; you can use these hashes to delete keys
- `deleteAccessKey(db_id, key_hash)`
- Removes a key by its hash
- `setDatabasePublic(db_id, public)`
- Toggles public/private
Copyable JSON examples are provided in the [RPC examples documentation](./rpc_examples.md).
## Typical flows
1) Public, unencrypted database
- Create a new database without an encryption key
- Clients can immediately `SELECT <id>` without a key
- You can later make it private and add keys if needed
2) Private, encrypted database
- Create passing an `encryption_key`
- Mark it private (`setDatabasePublic false`) and add access keys
- Clients must use `SELECT <id> KEY <plaintext_access_key>`
- Storage opens in encrypted mode automatically
## Security notes
- Only `SHA-256` hashes of access keys are stored in `DB 0`; keep plaintext keys safe on the client side
- The per-db encryption key is never exposed via the API after it is set
- The admin secret must be kept secure; anyone with it can `SELECT 0` and perform administrative actions
## Troubleshooting
- `ERR invalid access key` when selecting a private db
- Ensure you passed the `KEY` argument: `SELECT <id> KEY <plaintext_key>`
- If you recently added the key, confirm the permissions and that you used the exact plaintext (hash must match)
- `Database X not found`
- The id isnt registered in `DB 0` (`admin:dbs`). Use the management APIs to create or list databases
- Cannot `SELECT 0`
- The `KEY` must be the exact admin secret passed at server startup
## Reference
- Admin metadata lives in `DB 0` (`0.db`) and controls:
- Existence: `admin:dbs`
- Access: `meta:db:<id>.public` and `meta:db:<id>:keys`
- Encryption: `meta:db:<id>:enc`
For command examples and management payloads:
- RESP command basics: `docs/basics.md`
- Supported commands: `docs/cmds.md`
- JSON-RPC examples: `docs/rpc_examples.md`

96
docs/age.md Normal file
View File

@@ -0,0 +1,96 @@
# HeroDB AGE Cryptography
HeroDB provides AGE-based asymmetric encryption and digital signatures over the Redis protocol using X25519 for encryption and Ed25519 for signatures. Keys can be used in stateless (ephemeral) or key-managed (persistent, named) modes.
In key-managed mode, HeroDB uses a unified keypair concept: a single Ed25519 signing key is deterministically derived into X25519 keys for encryption, allowing one keypair to handle both encryption and signatures transparently.
## Cryptographic Algorithms
### X25519 (Encryption)
- Elliptic-curve Diffie-Hellman key exchange for symmetric key derivation.
- Used for encrypting/decrypting messages.
### Ed25519 (Signatures)
- EdDSA digital signatures for message authentication.
- Used for signing/verifying messages.
### Key Derivation
Ed25519 signing keys are deterministically converted to X25519 keys for encryption. This enables a single keypair to support both operations without additional keys. Derivation uses the Ed25519 secret scalar clamped for X25519.
In named keypairs, Ed25519 keys are stored, and X25519 keys are derived on-demand and cached.
## Stateless Mode (Ephemeral Keys)
No server-side storage; keys are provided with each command.
Available commands:
- `AGE GENENC`: Generate ephemeral X25519 keypair. Returns `[recipient, identity]`.
- `AGE GENSIGN`: Generate ephemeral Ed25519 keypair. Returns `[verify_pub, sign_secret]`.
- `AGE ENCRYPT <recipient> <message>`: Encrypt message. Returns base64 ciphertext.
- `AGE DECRYPT <identity> <ciphertext_b64>`: Decrypt ciphertext. Returns plaintext.
- `AGE SIGN <sign_secret> <message>`: Sign message. Returns base64 signature.
- `AGE VERIFY <verify_pub> <message> <signature_b64>`: Verify signature. Returns 1 (valid) or 0 (invalid).
Example:
```bash
redis-cli AGE GENENC
# → 1) "age1qz..." # recipient (X25519 public)
# 2) "AGE-SECRET-KEY-1..." # identity (X25519 secret)
redis-cli AGE ENCRYPT "age1qz..." "hello"
# → base64_ciphertext
redis-cli AGE DECRYPT "AGE-SECRET-KEY-1..." base64_ciphertext
# → "hello"
```
## Key-Managed Mode (Persistent Named Keys)
Keys are stored server-side under names. Supports unified keypairs for both encryption and signatures.
Available commands:
- `AGE KEYGEN <name>`: Generate and store unified keypair. Returns `[recipient, identity]` in age format.
- `AGE SIGNKEYGEN <name>`: Generate and store Ed25519 signing keypair. Returns `[verify_pub, sign_secret]`.
- `AGE ENCRYPTNAME <name> <message>`: Encrypt with named key. Returns base64 ciphertext.
- `AGE DECRYPTNAME <name> <ciphertext_b64>`: Decrypt with named key. Returns plaintext.
- `AGE SIGNNAME <name> <message>`: Sign with named key. Returns base64 signature.
- `AGE VERIFYNAME <name> <message> <signature_b64>`: Verify with named key. Returns 1 or 0.
- `AGE LIST`: List all stored key names. Returns sorted array of names.
### AGE LIST Output
Returns a flat, deduplicated, sorted array of key names (strings). Each name corresponds to a stored keypair, which may include encryption keys (X25519), signing keys (Ed25519), or both.
Output format: `["name1", "name2", ...]`
Example:
```bash
redis-cli AGE LIST
# → 1) "<named_keypair_1>"
# 2) "<named_keypair_2>"
```
For unified keypairs (from `AGE KEYGEN`), the name handles both encryption (derived X25519) and signatures (stored Ed25519) transparently.
Example with named keys:
```bash
redis-cli AGE KEYGEN app1
# → 1) "age1..." # recipient
# 2) "AGE-SECRET-KEY-1..." # identity
redis-cli AGE ENCRYPTNAME app1 "secret message"
# → base64_ciphertext
redis-cli AGE DECRYPTNAME app1 base64_ciphertext
# → "secret message"
redis-cli AGE SIGNNAME app1 "message"
# → base64_signature
redis-cli AGE VERIFYNAME app1 "message" base64_signature
# → 1
```
## Choosing a Mode
- **Stateless**: For ad-hoc operations without persistence; client manages keys.
- **Key-managed**: For centralized key lifecycle; server stores keys for convenience and discoverability.
Implementation: [herodb/src/age.rs](herodb/src/age.rs) <br>
Tests: [herodb/tests/usage_suite.rs](herodb/tests/usage_suite.rs)

724
docs/basics.md Normal file
View File

@@ -0,0 +1,724 @@
# HeroDB Basics
## Launching HeroDB
To launch HeroDB, use the binary with required and optional flags. The `--admin-secret` flag is mandatory, encrypting the admin database (DB 0) and authorizing admin access.
### Launch Flags
- `--dir <path>`: Directory for database files (default: current directory).
- `--port <port>`: TCP port for Redis protocol (default: 6379).
- `--debug`: Enable debug logging.
- `--sled`: Use Sled backend (default: Redb).
- `--enable-rpc`: Start JSON-RPC management server on port 8080.
- `--rpc-port <port>`: Custom RPC port (default: 8080).
- `--admin-secret <secret>`: Required secret for DB 0 encryption and admin access.
Example:
```bash
./target/release/herodb --dir /tmp/herodb --admin-secret mysecret --port 6379 --enable-rpc
```
Deprecated flags (`--encrypt`, `--encryption-key`) are ignored for data DBs; per-database encryption is managed via RPC.
## Admin Database (DB 0)
DB 0 acts as the administrative database instance, storing metadata for all user databases (IDs >= 1). It controls existence, access control, and per-database encryption. DB 0 is always encrypted with the `--admin-secret`.
When creating a new database, DB 0 allocates an ID, registers it, and optionally stores a per-database encryption key (write-only). Databases are public by default; use RPC to set them private, requiring access keys for SELECT (read or readwrite based on permissions). Keys are persisted in DB 0 for managed AGE operations.
Access DB 0 with `SELECT 0 KEY <admin-secret>`.
## Symmetric Encryption
HeroDB supports stateless symmetric encryption via SYM commands, using XChaCha20-Poly1305 AEAD.
Commands:
- `SYM KEYGEN`: Generate 32-byte key. Returns base64-encoded key.
- `SYM ENCRYPT <key_b64> <message>`: Encrypt message. Returns base64 ciphertext.
- `SYM DECRYPT <key_b64> <ciphertext_b64>`: Decrypt. Returns plaintext.
Example:
```bash
redis-cli SYM KEYGEN
# → base64_key
redis-cli SYM ENCRYPT base64_key "secret"
# → base64_ciphertext
redis-cli SYM DECRYPT base64_key base64_ciphertext
# → "secret"
```
## RPC Options
Enable the JSON-RPC server with `--enable-rpc` for database management. Methods include creating databases, managing access keys, and setting encryption. See [JSON-RPC Examples](./rpc_examples.md) for payloads.
# HeroDB Commands
HeroDB implements a subset of Redis commands over the Redis protocol. This document describes the available commands and their usage.
## String Commands
### PING
Ping the server to test connectivity.
```bash
redis-cli -p $PORT PING
# → PONG
```
### ECHO
Echo the given message.
```bash
redis-cli -p $PORT ECHO "hello"
# → hello
```
### SET
Set a key to hold a string value.
```bash
redis-cli -p $PORT SET key value
# → OK
```
Options:
- EX seconds: Set expiration in seconds
- PX milliseconds: Set expiration in milliseconds
- NX: Only set if key doesn't exist
- XX: Only set if key exists
- GET: Return old value
Examples:
```bash
redis-cli -p $PORT SET key value EX 60
redis-cli -p $PORT SET key value PX 1000
redis-cli -p $PORT SET key value NX
redis-cli -p $PORT SET key value XX
redis-cli -p $PORT SET key value GET
```
### GET
Get the value of a key.
```bash
redis-cli -p $PORT GET key
# → value
```
### MGET
Get values of multiple keys.
```bash
redis-cli -p $PORT MGET key1 key2 key3
# → 1) "value1"
# 2) "value2"
# 3) (nil)
```
### MSET
Set multiple key-value pairs.
```bash
redis-cli -p $PORT MSET key1 value1 key2 value2
# → OK
```
### INCR
Increment the integer value of a key by 1.
```bash
redis-cli -p $PORT SET counter 10
redis-cli -p $PORT INCR counter
# → 11
```
### DEL
Delete a key.
```bash
redis-cli -p $PORT DEL key
# → 1
```
For multiple keys:
```bash
redis-cli -p $PORT DEL key1 key2 key3
# → number of keys deleted
```
### TYPE
Determine the type of a key.
```bash
redis-cli -p $PORT TYPE key
# → string
```
### EXISTS
Check if a key exists.
```bash
redis-cli -p $PORT EXISTS key
# → 1 (exists) or 0 (doesn't exist)
```
For multiple keys:
```bash
redis-cli -p $PORT EXISTS key1 key2 key3
# → count of existing keys
```
### EXPIRE / PEXPIRE
Set expiration time for a key.
```bash
redis-cli -p $PORT EXPIRE key 60
# → 1 (timeout set) or 0 (timeout not set)
redis-cli -p $PORT PEXPIRE key 1000
# → 1 (timeout set) or 0 (timeout not set)
```
### EXPIREAT / PEXPIREAT
Set expiration timestamp for a key.
```bash
redis-cli -p $PORT EXPIREAT key 1672531200
# → 1 (timeout set) or 0 (timeout not set)
redis-cli -p $PORT PEXPIREAT key 1672531200000
# → 1 (timeout set) or 0 (timeout not set)
```
### TTL
Get the time to live for a key.
```bash
redis-cli -p $PORT TTL key
# → remaining time in seconds
```
### PERSIST
Remove expiration from a key.
```bash
redis-cli -p $PORT PERSIST key
# → 1 (timeout removed) or 0 (key has no timeout)
```
## Hash Commands
### HSET
Set field-value pairs in a hash.
```bash
redis-cli -p $PORT HSET hashkey field1 value1 field2 value2
# → number of fields added
```
### HGET
Get value of a field in a hash.
```bash
redis-cli -p $PORT HGET hashkey field1
# → value1
```
### HGETALL
Get all field-value pairs in a hash.
```bash
redis-cli -p $PORT HGETALL hashkey
# → 1) "field1"
# 2) "value1"
# 3) "field2"
# 4) "value2"
```
### HDEL
Delete fields from a hash.
```bash
redis-cli -p $PORT HDEL hashkey field1 field2
# → number of fields deleted
```
### HEXISTS
Check if a field exists in a hash.
```bash
redis-cli -p $PORT HEXISTS hashkey field1
# → 1 (exists) or 0 (doesn't exist)
```
### HKEYS
Get all field names in a hash.
```bash
redis-cli -p $PORT HKEYS hashkey
# → 1) "field1"
# 2) "field2"
```
### HVALS
Get all values in a hash.
```bash
redis-cli -p $PORT HVALS hashkey
# → 1) "value1"
# 2) "value2"
```
### HLEN
Get number of fields in a hash.
```bash
redis-cli -p $PORT HLEN hashkey
# → number of fields
```
### HMGET
Get values of multiple fields in a hash.
```bash
redis-cli -p $PORT HMGET hashkey field1 field2 field3
# → 1) "value1"
# 2) "value2"
# 3) (nil)
```
### HSETNX
Set field-value pair in hash only if field doesn't exist.
```bash
redis-cli -p $PORT HSETNX hashkey field1 value1
# → 1 (field set) or 0 (field not set)
```
### HINCRBY
Increment integer value of a field in a hash.
```bash
redis-cli -p $PORT HINCRBY hashkey field1 5
# → new value
```
### HINCRBYFLOAT
Increment float value of a field in a hash.
```bash
redis-cli -p $PORT HINCRBYFLOAT hashkey field1 3.14
# → new value
```
### HSCAN
Incrementally iterate over fields in a hash.
```bash
redis-cli -p $PORT HSCAN hashkey 0
# → 1) "next_cursor"
# 2) 1) "field1"
# 2) "value1"
# 3) "field2"
# 4) "value2"
```
Options:
- MATCH pattern: Filter fields by pattern
- COUNT number: Suggest number of fields to return
Examples:
```bash
redis-cli -p $PORT HSCAN hashkey 0 MATCH f*
redis-cli -p $PORT HSCAN hashkey 0 COUNT 10
redis-cli -p $PORT HSCAN hashkey 0 MATCH f* COUNT 10
```
## List Commands
### LPUSH
Insert elements at the head of a list.
```bash
redis-cli -p $PORT LPUSH listkey element1 element2 element3
# → number of elements in the list
```
### RPUSH
Insert elements at the tail of a list.
```bash
redis-cli -p $PORT RPUSH listkey element1 element2 element3
# → number of elements in the list
```
### LPOP
Remove and return elements from the head of a list.
```bash
redis-cli -p $PORT LPOP listkey
# → element1
```
With count:
```bash
redis-cli -p $PORT LPOP listkey 2
# → 1) "element1"
# 2) "element2"
```
### RPOP
Remove and return elements from the tail of a list.
```bash
redis-cli -p $PORT RPOP listkey
# → element3
```
With count:
```bash
redis-cli -p $PORT RPOP listkey 2
# → 1) "element3"
# 2) "element2"
```
### LLEN
Get the length of a list.
```bash
redis-cli -p $PORT LLEN listkey
# → number of elements in the list
```
### LINDEX
Get element at index in a list.
```bash
redis-cli -p $PORT LINDEX listkey 0
# → first element
```
Negative indices count from the end:
```bash
redis-cli -p $PORT LINDEX listkey -1
# → last element
```
### LRANGE
Get a range of elements from a list.
```bash
redis-cli -p $PORT LRANGE listkey 0 -1
# → 1) "element1"
# 2) "element2"
# 3) "element3"
```
### LTRIM
Trim a list to specified range.
```bash
redis-cli -p $PORT LTRIM listkey 0 1
# → OK (list now contains only first 2 elements)
```
### LREM
Remove elements from a list.
```bash
redis-cli -p $PORT LREM listkey 2 element1
# → number of elements removed
```
Count values:
- Positive: Remove from head
- Negative: Remove from tail
- Zero: Remove all
### LINSERT
Insert element before or after pivot element.
```bash
redis-cli -p $PORT LINSERT listkey BEFORE pivot newelement
# → number of elements in the list
```
### BLPOP
Blocking remove and return elements from the head of a list.
```bash
redis-cli -p $PORT BLPOP listkey1 listkey2 5
# → 1) "listkey1"
# 2) "element1"
```
If no elements are available, blocks for specified timeout (in seconds) until an element is pushed to one of the lists.
### BRPOP
Blocking remove and return elements from the tail of a list.
```bash
redis-cli -p $PORT BRPOP listkey1 listkey2 5
# → 1) "listkey1"
# 2) "element1"
```
If no elements are available, blocks for specified timeout (in seconds) until an element is pushed to one of the lists.
## Keyspace Commands
### KEYS
Get all keys matching pattern.
```bash
redis-cli -p $PORT KEYS *
# → 1) "key1"
# 2) "key2"
```
### SCAN
Incrementally iterate over keys.
```bash
redis-cli -p $PORT SCAN 0
# → 1) "next_cursor"
# 2) 1) "key1"
# 2) "key2"
```
Options:
- MATCH pattern: Filter keys by pattern
- COUNT number: Suggest number of keys to return
Examples:
```bash
redis-cli -p $PORT SCAN 0 MATCH k*
redis-cli -p $PORT SCAN 0 COUNT 10
redis-cli -p $PORT SCAN 0 MATCH k* COUNT 10
```
### DBSIZE
Get number of keys in current database.
```bash
redis-cli -p $PORT DBSIZE
# → number of keys
```
### FLUSHDB
Remove all keys from current database.
```bash
redis-cli -p $PORT FLUSHDB
# → OK
```
## Configuration Commands
### CONFIG GET
Get configuration parameter.
```bash
redis-cli -p $PORT CONFIG GET dir
# → 1) "dir"
# 2) "/path/to/db"
redis-cli -p $PORT CONFIG GET dbfilename
# → 1) "dbfilename"
# 2) "0.db"
```
## Client Commands
### CLIENT SETNAME
Set current connection name.
```bash
redis-cli -p $PORT CLIENT SETNAME myconnection
# → OK
```
### CLIENT GETNAME
Get current connection name.
```bash
redis-cli -p $PORT CLIENT GETNAME
# → myconnection
```
## Transaction Commands
### MULTI
Start a transaction block.
```bash
redis-cli -p $PORT MULTI
# → OK
```
### EXEC
Execute all commands in transaction block.
```bash
redis-cli -p $PORT MULTI
redis-cli -p $PORT SET key1 value1
redis-cli -p $PORT SET key2 value2
redis-cli -p $PORT EXEC
# → 1) OK
# 2) OK
```
### DISCARD
Discard all commands in transaction block.
```bash
redis-cli -p $PORT MULTI
redis-cli -p $PORT SET key1 value1
redis-cli -p $PORT DISCARD
# → OK
```
## AGE Commands
### AGE GENENC
Generate ephemeral encryption keypair.
```bash
redis-cli -p $PORT AGE GENENC
# → 1) "recipient_public_key"
# 2) "identity_secret_key"
```
### AGE ENCRYPT
Encrypt message with recipient public key.
```bash
redis-cli -p $PORT AGE ENCRYPT recipient_public_key "message"
# → base64_encoded_ciphertext
```
### AGE DECRYPT
Decrypt ciphertext with identity secret key.
```bash
redis-cli -p $PORT AGE DECRYPT identity_secret_key base64_encoded_ciphertext
# → decrypted_message
```
### AGE GENSIGN
Generate ephemeral signing keypair.
```bash
redis-cli -p $PORT AGE GENSIGN
# → 1) "verify_public_key"
# 2) "sign_secret_key"
```
### AGE SIGN
Sign message with signing secret key.
```bash
redis-cli -p $PORT AGE SIGN sign_secret_key "message"
# → base64_encoded_signature
```
### AGE VERIFY
Verify signature with verify public key.
```bash
redis-cli -p $PORT AGE VERIFY verify_public_key "message" base64_encoded_signature
# → 1 (valid) or 0 (invalid)
```
### AGE KEYGEN
Generate and persist named encryption keypair.
```bash
redis-cli -p $PORT AGE KEYGEN keyname
# → 1) "recipient_public_key"
# 2) "identity_secret_key"
```
### AGE SIGNKEYGEN
Generate and persist named signing keypair.
```bash
redis-cli -p $PORT AGE SIGNKEYGEN keyname
# → 1) "verify_public_key"
# 2) "sign_secret_key"
```
### AGE ENCRYPTNAME
Encrypt message with named key.
```bash
redis-cli -p $PORT AGE ENCRYPTNAME keyname "message"
# → base64_encoded_ciphertext
```
### AGE DECRYPTNAME
Decrypt ciphertext with named key.
```bash
redis-cli -p $PORT AGE DECRYPTNAME keyname base64_encoded_ciphertext
# → decrypted_message
```
### AGE SIGNNAME
Sign message with named signing key.
```bash
redis-cli -p $PORT AGE SIGNNAME keyname "message"
# → base64_encoded_signature
```
### AGE VERIFYNAME
Verify signature with named verify key.
```bash
redis-cli -p $PORT AGE VERIFYNAME keyname "message" base64_encoded_signature
# → 1 (valid) or 0 (invalid)
```
### AGE LIST
List all stored AGE keys.
```bash
redis-cli -p $PORT AGE LIST
# → 1) "keyname1"
# 2) "keyname2"
```
## SYM Commands
### SYM KEYGEN
Generate a symmetric encryption key.
```bash
redis-cli -p $PORT SYM KEYGEN
# → base64_encoded_32byte_key
```
### SYM ENCRYPT
Encrypt a message with a symmetric key.
```bash
redis-cli -p $PORT SYM ENCRYPT <key_b64> "message"
# → base64_encoded_ciphertext
```
### SYM DECRYPT
Decrypt a ciphertext with a symmetric key.
```bash
redis-cli -p $PORT SYM DECRYPT <key_b64> <ciphertext_b64>
# → decrypted_message
```
## Server Information Commands
### INFO
Get server information.
```bash
redis-cli -p $PORT INFO
# → Server information
```
With section:
```bash
redis-cli -p $PORT INFO replication
# → Replication information
```
### COMMAND
Get command information (stub implementation).
```bash
redis-cli -p $PORT COMMAND
# → Empty array (stub)
```
## Database Selection
### SELECT
Select database by index.
```bash
redis-cli -p $PORT SELECT 0
# → OK
```
```
This expanded documentation includes all the list commands that were implemented in the cmd.rs file:
1. LPUSH - push elements to the left (head) of a list
2. RPUSH - push elements to the right (tail) of a list
3. LPOP - pop elements from the left (head) of a list
4. RPOP - pop elements from the right (tail) of a list
5. BLPOP - blocking pop from the left with timeout
6. BRPOP - blocking pop from the right with timeout
7. LLEN - get list length
8. LREM - remove elements from list
9. LTRIM - trim list to range
10. LINDEX - get element by index
11. LRANGE - get range of elements
## Updated Database Selection and Access Keys
HeroDB uses an `Admin DB 0` to control database existence, access, and encryption. Access to data DBs can be public (no key) or private (requires a key). See detailed model in `docs/admin.md`.
Examples:
```bash
# Public database (no key required)
redis-cli -p $PORT SELECT 1
# → OK
```
```bash
# Private database (requires access key)
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
# → OK
```
```bash
# Admin DB 0 (requires admin secret)
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
# → OK
```

148
docs/cmds.md Normal file
View File

@@ -0,0 +1,148 @@
## Backend Support
HeroDB supports two storage backends, both with full encryption support:
- **redb** (default): Full-featured, optimized for production use
- **sled**: Alternative embedded database with encryption support
### Starting HeroDB with Different Backends
```bash
# Use default redb backend
./target/release/herodb --dir /tmp/herodb_redb --port 6379
# Use sled backend
./target/release/herodb --dir /tmp/herodb_sled --port 6379 --sled
# Use redb with encryption
./target/release/herodb --dir /tmp/herodb_encrypted --port 6379 --encrypt --key mysecretkey
# Use sled with encryption
./target/release/herodb --dir /tmp/herodb_sled_encrypted --port 6379 --sled --encrypt --key mysecretkey
```
### Command Support by Backend
Command Category | redb | sled | Notes |
|-----------------|------|------|-------|
**Strings** | | | |
SET | ✅ | ✅ | Full support |
GET | ✅ | ✅ | Full support |
DEL | ✅ | ✅ | Full support |
EXISTS | ✅ | ✅ | Full support |
INCR/DECR | ✅ | ✅ | Full support |
MGET/MSET | ✅ | ✅ | Full support |
**Hashes** | | | |
HSET | ✅ | ✅ | Full support |
HGET | ✅ | ✅ | Full support |
HGETALL | ✅ | ✅ | Full support |
HDEL | ✅ | ✅ | Full support |
HEXISTS | ✅ | ✅ | Full support |
HKEYS | ✅ | ✅ | Full support |
HVALS | ✅ | ✅ | Full support |
HLEN | ✅ | ✅ | Full support |
HMGET | ✅ | ✅ | Full support |
HSETNX | ✅ | ✅ | Full support |
HINCRBY/HINCRBYFLOAT | ✅ | ✅ | Full support |
HSCAN | ✅ | ✅ | Full support with pattern matching |
**Lists** | | | |
LPUSH/RPUSH | ✅ | ✅ | Full support |
LPOP/RPOP | ✅ | ✅ | Full support |
LLEN | ✅ | ✅ | Full support |
LRANGE | ✅ | ✅ | Full support |
LINDEX | ✅ | ✅ | Full support |
LTRIM | ✅ | ✅ | Full support |
LREM | ✅ | ✅ | Full support |
BLPOP/BRPOP | ✅ | ❌ | Blocking operations not in sled |
**Expiration** | | | |
EXPIRE | ✅ | ✅ | Full support in both |
TTL | ✅ | ✅ | Full support in both |
PERSIST | ✅ | ✅ | Full support in both |
SETEX/PSETEX | ✅ | ✅ | Full support in both |
EXPIREAT/PEXPIREAT | ✅ | ✅ | Full support in both |
**Scanning** | | | |
KEYS | ✅ | ✅ | Full support with patterns |
SCAN | ✅ | ✅ | Full cursor-based iteration |
HSCAN | ✅ | ✅ | Full cursor-based iteration |
**Transactions** | | | |
MULTI/EXEC/DISCARD | ✅ | ❌ | Only supported in redb |
**Encryption** | | | |
Data-at-rest encryption | ✅ | ✅ | Both support [age](age.tech) encryption |
AGE commands | ✅ | ✅ | Both support AGE crypto commands |
**Full-Text Search** | | | |
FT.CREATE | ✅ | ✅ | Create search index with schema |
FT.ADD | ✅ | ✅ | Add document to search index |
FT.SEARCH | ✅ | ✅ | Search documents with query |
FT.DEL | ✅ | ✅ | Delete document from index |
FT.INFO | ✅ | ✅ | Get index information |
FT.DROP | ✅ | ✅ | Drop search index |
FT.ALTER | ✅ | ✅ | Alter index schema |
FT.AGGREGATE | ✅ | ✅ | Aggregate search results |
### Performance Considerations
- **redb**: Optimized for concurrent access, better for high-throughput scenarios
- **sled**: Lock-free architecture, excellent for specific workloads
### Encryption Features
Both backends support:
- Transparent data-at-rest encryption using the `age` encryption library
- Per-database encryption (databases >= 10 are encrypted when `--encrypt` flag is used)
- Secure key derivation using the master key
### Backend Selection Examples
```bash
# Example: Testing both backends
redis-cli -p 6379 SET mykey "redb value"
redis-cli -p 6381 SET mykey "sled value"
# Example: Using encryption with both
./target/release/herodb --port 6379 --encrypt --key secret123
./target/release/herodb --port 6381 --sled --encrypt --key secret123
# Both support the same Redis commands
redis-cli -p 6379 HSET user:1 name "Alice" age "30"
redis-cli -p 6381 HSET user:1 name "Alice" age "30"
# Both support SCAN operations
redis-cli -p 6379 SCAN 0 MATCH user:* COUNT 10
redis-cli -p 6381 SCAN 0 MATCH user:* COUNT 10
```
### Migration Between Backends
To migrate data between backends, use Redis replication or dump/restore:
```bash
# Export from redb
redis-cli -p 6379 --rdb dump.rdb
# Import to sled
redis-cli -p 6381 --pipe < dump.rdb
```
## Authentication and Database Selection
HeroDB uses an `Admin DB 0` to govern database existence, access and per-db encryption. Access control is enforced via `Admin DB 0` metadata. See the full model in `docs/admin.md`.
Examples:
```bash
# Public database (no key required)
redis-cli -p $PORT SELECT 1
# → OK
```
```bash
# Private database (requires access key)
redis-cli -p $PORT SELECT 2 KEY my-db2-access-key
# → OK
```
```bash
# Admin DB 0 (requires admin secret)
redis-cli -p $PORT SELECT 0 KEY my-admin-secret
# → OK
```

141
docs/rpc_examples.md Normal file
View File

@@ -0,0 +1,141 @@
# HeroDB JSON-RPC Examples
These examples show full JSON-RPC 2.0 payloads for managing HeroDB via the RPC API (enable with `--enable-rpc`). Methods are named as `hero_<function>`. Params are positional arrays; enum values are strings (e.g., `"Redb"`). Copy-paste into Postman or similar clients.
## Database Management
### Create Database
Creates a new database with optional per-database encryption key (stored write-only in Admin DB 0).
```json
{
"jsonrpc": "2.0",
"id": 1,
"method": "hero_createDatabase",
"params": [
"Redb",
{ "name": null, "storage_path": null, "max_size": null, "redis_version": null },
null
]
}
```
With encryption:
```json
{
"jsonrpc": "2.0",
"id": 2,
"method": "hero_createDatabase",
"params": [
"Sled",
{ "name": "secure-db", "storage_path": null, "max_size": null, "redis_version": null },
"my-per-db-encryption-key"
]
}
```
### List Databases
Returns array of database infos (id, backend, encrypted status, size, etc.).
```json
{
"jsonrpc": "2.0",
"id": 3,
"method": "hero_listDatabases",
"params": []
}
```
### Get Database Info
Retrieves detailed info for a specific database.
```json
{
"jsonrpc": "2.0",
"id": 4,
"method": "hero_getDatabaseInfo",
"params": [1]
}
```
### Delete Database
Removes physical database file; metadata remains in Admin DB 0.
```json
{
"jsonrpc": "2.0",
"id": 5,
"method": "hero_deleteDatabase",
"params": [1]
}
```
## Access Control
### Add Access Key
Adds a hashed access key for private databases. Permissions: `"read"` or `"readwrite"`.
```json
{
"jsonrpc": "2.0",
"id": 6,
"method": "hero_addAccessKey",
"params": [2, "my-access-key", "readwrite"]
}
```
### List Access Keys
Returns array of key hashes, permissions, and creation timestamps.
```json
{
"jsonrpc": "2.0",
"id": 7,
"method": "hero_listAccessKeys",
"params": [2]
}
```
### Delete Access Key
Removes key by its SHA-256 hash.
```json
{
"jsonrpc": "2.0",
"id": 8,
"method": "hero_deleteAccessKey",
"params": [2, "0123abcd...keyhash..."]
}
```
### Set Database Public/Private
Toggles public access (default true). Private databases require access keys.
```json
{
"jsonrpc": "2.0",
"id": 9,
"method": "hero_setDatabasePublic",
"params": [2, false]
}
```
## Server Info
### Get Server Stats
Returns stats like total databases and uptime.
```json
{
"jsonrpc": "2.0",
"id": 10,
"method": "hero_getServerStats",
"params": []
}
```
## Notes
- Per-database encryption keys are write-only; set at creation and used transparently.
- Access keys are hashed (SHA-256) for storage; provide plaintext in requests.
- Backend options: `"Redb"` (default) or `"Sled"`.
- Config object fields (name, storage_path, etc.) are optional and currently ignored but positional.

397
docs/search.md Normal file
View File

@@ -0,0 +1,397 @@
# Full-Text Search with Tantivy
HeroDB includes powerful full-text search capabilities powered by [Tantivy](https://github.com/quickwit-oss/tantivy), a fast full-text search engine library written in Rust. This provides Redis-compatible search commands similar to RediSearch.
## Overview
The search functionality allows you to:
- Create search indexes with custom schemas
- Index documents with multiple field types
- Perform complex queries with filters
- Support for text, numeric, date, and geographic data
- Real-time search with high performance
## Search Commands
### FT.CREATE - Create Search Index
Create a new search index with a defined schema.
```bash
FT.CREATE index_name SCHEMA field_name field_type [options] [field_name field_type [options] ...]
```
**Field Types:**
- `TEXT` - Full-text searchable text fields
- `NUMERIC` - Numeric fields (integers, floats)
- `TAG` - Tag fields for exact matching
- `GEO` - Geographic coordinates (lat,lon)
- `DATE` - Date/timestamp fields
**Field Options:**
- `STORED` - Store field value for retrieval
- `INDEXED` - Make field searchable
- `TOKENIZED` - Enable tokenization for text fields
- `FAST` - Enable fast access for numeric fields
**Example:**
```bash
# Create a product search index
FT.CREATE products SCHEMA
title TEXT STORED INDEXED TOKENIZED
description TEXT STORED INDEXED TOKENIZED
price NUMERIC STORED INDEXED FAST
category TAG STORED
location GEO STORED
created_date DATE STORED INDEXED
```
### FT.ADD - Add Document to Index
Add a document to a search index.
```bash
FT.ADD index_name doc_id [SCORE score] FIELDS field_name field_value [field_name field_value ...]
```
**Example:**
```bash
# Add a product document
FT.ADD products product:1 SCORE 1.0 FIELDS
title "Wireless Headphones"
description "High-quality wireless headphones with noise cancellation"
price 199.99
category "electronics"
location "37.7749,-122.4194"
created_date 1640995200000
```
### FT.SEARCH - Search Documents
Search for documents in an index.
```bash
FT.SEARCH index_name query [LIMIT offset count] [FILTER field min max] [RETURN field [field ...]]
```
**Query Syntax:**
- Simple terms: `wireless headphones`
- Phrase queries: `"noise cancellation"`
- Field-specific: `title:wireless`
- Boolean operators: `wireless AND headphones`
- Wildcards: `head*`
**Examples:**
```bash
# Simple text search
FT.SEARCH products "wireless headphones"
# Search with filters
FT.SEARCH products "headphones" FILTER price 100 300 LIMIT 0 10
# Field-specific search
FT.SEARCH products "title:wireless AND category:electronics"
# Return specific fields only
FT.SEARCH products "*" RETURN title price
```
### FT.DEL - Delete Document
Remove a document from the search index.
```bash
FT.DEL index_name doc_id
```
**Example:**
```bash
FT.DEL products product:1
```
### FT.INFO - Get Index Information
Get information about a search index.
```bash
FT.INFO index_name
```
**Returns:**
- Index name and document count
- Field definitions and types
- Index configuration
**Example:**
```bash
FT.INFO products
```
### FT.DROP - Drop Index
Delete an entire search index.
```bash
FT.DROP index_name
```
**Example:**
```bash
FT.DROP products
```
### FT.ALTER - Alter Index Schema
Add new fields to an existing index.
```bash
FT.ALTER index_name SCHEMA ADD field_name field_type [options]
```
**Example:**
```bash
FT.ALTER products SCHEMA ADD brand TAG STORED
```
### FT.AGGREGATE - Aggregate Search Results
Perform aggregations on search results.
```bash
FT.AGGREGATE index_name query [GROUPBY field] [REDUCE function field AS alias]
```
**Example:**
```bash
# Group products by category and count
FT.AGGREGATE products "*" GROUPBY category REDUCE COUNT 0 AS count
```
## Field Types in Detail
### TEXT Fields
- **Purpose**: Full-text search on natural language content
- **Features**: Tokenization, stemming, stop-word removal
- **Options**: `STORED`, `INDEXED`, `TOKENIZED`
- **Example**: Product titles, descriptions, content
### NUMERIC Fields
- **Purpose**: Numeric data for range queries and sorting
- **Types**: I64, U64, F64
- **Options**: `STORED`, `INDEXED`, `FAST`
- **Example**: Prices, quantities, ratings
### TAG Fields
- **Purpose**: Exact-match categorical data
- **Features**: No tokenization, exact string matching
- **Options**: `STORED`, case sensitivity control
- **Example**: Categories, brands, status values
### GEO Fields
- **Purpose**: Geographic coordinates
- **Format**: "latitude,longitude" (e.g., "37.7749,-122.4194")
- **Features**: Geographic distance queries
- **Options**: `STORED`
### DATE Fields
- **Purpose**: Timestamp and date data
- **Format**: Unix timestamp in milliseconds
- **Features**: Range queries, temporal filtering
- **Options**: `STORED`, `INDEXED`, `FAST`
## Search Query Syntax
### Basic Queries
```bash
# Single term
FT.SEARCH products "wireless"
# Multiple terms (AND by default)
FT.SEARCH products "wireless headphones"
# Phrase query
FT.SEARCH products "\"noise cancellation\""
```
### Field-Specific Queries
```bash
# Search in specific field
FT.SEARCH products "title:wireless"
# Multiple field queries
FT.SEARCH products "title:wireless AND description:bluetooth"
```
### Boolean Operators
```bash
# AND operator
FT.SEARCH products "wireless AND headphones"
# OR operator
FT.SEARCH products "wireless OR bluetooth"
# NOT operator
FT.SEARCH products "headphones NOT wired"
```
### Wildcards and Fuzzy Search
```bash
# Wildcard search
FT.SEARCH products "head*"
# Fuzzy search (approximate matching)
FT.SEARCH products "%headphone%"
```
### Range Queries
```bash
# Numeric range in query
FT.SEARCH products "@price:[100 300]"
# Date range
FT.SEARCH products "@created_date:[1640995200000 1672531200000]"
```
## Filtering and Sorting
### FILTER Clause
```bash
# Numeric filter
FT.SEARCH products "headphones" FILTER price 100 300
# Multiple filters
FT.SEARCH products "*" FILTER price 100 500 FILTER rating 4 5
```
### LIMIT Clause
```bash
# Pagination
FT.SEARCH products "wireless" LIMIT 0 10 # First 10 results
FT.SEARCH products "wireless" LIMIT 10 10 # Next 10 results
```
### RETURN Clause
```bash
# Return specific fields
FT.SEARCH products "*" RETURN title price
# Return all stored fields (default)
FT.SEARCH products "*"
```
## Performance Considerations
### Indexing Strategy
- Only index fields you need to search on
- Use `FAST` option for frequently filtered numeric fields
- Consider storage vs. search performance trade-offs
### Query Optimization
- Use specific field queries when possible
- Combine filters with text queries for better performance
- Use pagination with LIMIT for large result sets
### Memory Usage
- Tantivy indexes are memory-mapped for performance
- Index size depends on document count and field configuration
- Monitor disk space for index storage
## Integration with Redis Commands
Search indexes work alongside regular Redis data:
```bash
# Store product data in Redis hash
HSET product:1 title "Wireless Headphones" price "199.99"
# Index the same data for search
FT.ADD products product:1 FIELDS title "Wireless Headphones" price 199.99
# Search returns document IDs that can be used with Redis commands
FT.SEARCH products "wireless"
# Returns: product:1
# Retrieve full data using Redis
HGETALL product:1
```
## Example Use Cases
### E-commerce Product Search
```bash
# Create product catalog index
FT.CREATE catalog SCHEMA
name TEXT STORED INDEXED TOKENIZED
description TEXT INDEXED TOKENIZED
price NUMERIC STORED INDEXED FAST
category TAG STORED
brand TAG STORED
rating NUMERIC STORED FAST
# Add products
FT.ADD catalog prod:1 FIELDS name "iPhone 14" price 999 category "phones" brand "apple" rating 4.5
FT.ADD catalog prod:2 FIELDS name "Samsung Galaxy" price 899 category "phones" brand "samsung" rating 4.3
# Search queries
FT.SEARCH catalog "iPhone"
FT.SEARCH catalog "phones" FILTER price 800 1000
FT.SEARCH catalog "@brand:apple"
```
### Content Management
```bash
# Create content index
FT.CREATE content SCHEMA
title TEXT STORED INDEXED TOKENIZED
body TEXT INDEXED TOKENIZED
author TAG STORED
published DATE STORED INDEXED
tags TAG STORED
# Search content
FT.SEARCH content "machine learning"
FT.SEARCH content "@author:john AND @tags:ai"
FT.SEARCH content "*" FILTER published 1640995200000 1672531200000
```
### Geographic Search
```bash
# Create location-based index
FT.CREATE places SCHEMA
name TEXT STORED INDEXED TOKENIZED
location GEO STORED
type TAG STORED
# Add locations
FT.ADD places place:1 FIELDS name "Golden Gate Bridge" location "37.8199,-122.4783" type "landmark"
# Geographic queries (future feature)
FT.SEARCH places "@location:[37.7749 -122.4194 10 km]"
```
## Error Handling
Common error responses:
- `ERR index not found` - Index doesn't exist
- `ERR field not found` - Field not defined in schema
- `ERR invalid query syntax` - Malformed query
- `ERR document not found` - Document ID doesn't exist
## Best Practices
1. **Schema Design**: Plan your schema carefully - changes require reindexing
2. **Field Selection**: Only store and index fields you actually need
3. **Batch Operations**: Add multiple documents efficiently
4. **Query Testing**: Test queries for performance with realistic data
5. **Monitoring**: Monitor index size and query performance
6. **Backup**: Include search indexes in backup strategies
## Future Enhancements
Planned features:
- Geographic distance queries
- Advanced aggregations and faceting
- Highlighting of search results
- Synonyms and custom analyzers
- Real-time suggestions and autocomplete
- Index replication and sharding

253
docs/tantivy.md Normal file
View File

@@ -0,0 +1,253 @@
# Tantivy FullText Backend (JSONRPC)
This document explains how to use HeroDBs Tantivy-backed fulltext search as a dedicated database backend and provides copypasteable JSONRPC requests. Tantivy is available only for nonadmin databases (db_id >= 1). Admin DB 0 always uses Redb/Sled and rejects FT operations.
Important characteristics:
- Tantivy is a third backend alongside Redb and Sled. It provides search indexes only; there is no KV store backing it.
- On Tantivy databases, Redis KV/list/hash commands are rejected; only FT commands and basic control (SELECT, CLIENT, INFO, etc.) are allowed.
- FT JSONRPC is namespaced as "herodb" and methods are named with underscore: herodb_ftCreate, herodb_ftAdd, herodb_ftSearch, herodb_ftDel, herodb_ftInfo, herodb_ftDrop.
Reference to server implementation:
- RPC methods are defined in [rust.trait Rpc()](src/rpc.rs:70):
- [rust.fn ft_create()](src/rpc.rs:121)
- [rust.fn ft_add()](src/rpc.rs:130)
- [rust.fn ft_search()](src/rpc.rs:141)
- [rust.fn ft_del()](src/rpc.rs:154)
- [rust.fn ft_info()](src/rpc.rs:158)
- [rust.fn ft_drop()](src/rpc.rs:162)
Notes on responses:
- ftCreate/ftAdd/ftDel/ftDrop return a JSON boolean: true on success.
- ftSearch/ftInfo return a JSON object with a single key "resp" containing a RESPencoded string (wire format used by Redis). You can display or parse it on the client side as needed.
RESP usage (redis-cli):
- For RESP clients, you must SELECT the Tantivy database first. SELECT now succeeds for Tantivy DBs without opening KV storage.
- After SELECT, you can run FT.* commands within that DB context.
Example with redis-cli:
```bash
# Connect to server
redis-cli -p 6379
# Select Tantivy DB 1 (public by default)
SELECT 1
# → OK
# Create index
FT.CREATE product_catalog SCHEMA title TEXT description TEXT category TAG price NUMERIC rating NUMERIC location GEO
# → OK
# Add a document
FT.ADD product_catalog product:1 1.0 title "Wireless Bluetooth Headphones" description "Premium noise-canceling headphones with 30-hour battery life" category "electronics,audio" price 299.99 rating 4.5 location "-122.4194,37.7749"
# → OK
# Search
FT.SEARCH product_catalog wireless LIMIT 0 3
# → RESP array with hits
```
Storage layout (on disk):
- Indices are stored per database under:
- <base_dir>/search_indexes/<db_id>/<index_name>
- Example: /tmp/test/search_indexes/1/product_catalog
0) Create a new Tantivy database
Use herodb_createDatabase with backend "Tantivy". DB 0 cannot be Tantivy.
```json
{
"jsonrpc": "2.0",
"id": 1,
"method": "herodb_createDatabase",
"params": [
"Tantivy",
{ "name": "search-db", "storage_path": null, "max_size": null, "redis_version": null },
null
]
}
```
The response contains the allocated db_id (>= 1). Use that id in the calls below.
1) FT.CREATE — create an index with schema
Method: herodb_ftCreate → [rust.fn ft_create()](src/rpc.rs:121)
Schema format is an array of tuples: [ [field_name, field_type, [options...] ], ... ]
Supported field types: "TEXT", "NUMERIC" (defaults to F64), "TAG", "GEO"
Supported options (subset): "WEIGHT", "SORTABLE", "NOINDEX", "SEPARATOR", "CASESENSITIVE"
```json
{
"jsonrpc": "2.0",
"id": 2,
"method": "herodb_ftCreate",
"params": [
1,
"product_catalog",
[
["title", "TEXT", ["SORTABLE"]],
["description", "TEXT", []],
["category", "TAG", ["SEPARATOR", ","]],
["price", "NUMERIC", ["SORTABLE"]],
["rating", "NUMERIC", []],
["location", "GEO", []]
]
]
}
```
Returns: true on success.
2) FT.ADD — add or replace a document
Method: herodb_ftAdd → [rust.fn ft_add()](src/rpc.rs:130)
Fields is an object (map) of field_name → value (all values are sent as strings). GEO expects "lat,lon".
```json
{
"jsonrpc": "2.0",
"id": 3,
"method": "herodb_ftAdd",
"params": [
1,
"product_catalog",
"product:1",
1.0,
{
"title": "Wireless Bluetooth Headphones",
"description": "Premium noise-canceling headphones with 30-hour battery life",
"category": "electronics,audio",
"price": "299.99",
"rating": "4.5",
"location": "-122.4194,37.7749"
}
]
}
```
Returns: true on success.
3) FT.SEARCH — query an index
Method: herodb_ftSearch → [rust.fn ft_search()](src/rpc.rs:141)
Parameters: (db_id, index_name, query, filters?, limit?, offset?, return_fields?)
- filters: array of [field, value] pairs (Equals filter)
- limit/offset: numbers (defaults: limit=10, offset=0)
- return_fields: array of field names to include (optional)
Simple query:
```json
{
"jsonrpc": "2.0",
"id": 4,
"method": "herodb_ftSearch",
"params": [1, "product_catalog", "wireless", null, 10, 0, null]
}
```
Pagination + filters + selected fields:
```json
{
"jsonrpc": "2.0",
"id": 5,
"method": "herodb_ftSearch",
"params": [
1,
"product_catalog",
"mouse",
[["category", "electronics"]],
5,
0,
["title", "price", "rating"]
]
}
```
Response shape:
```json
{
"jsonrpc": "2.0",
"id": 5,
"result": { "resp": "*...RESP encoded array..." }
}
```
4) FT.INFO — index metadata
Method: herodb_ftInfo → [rust.fn ft_info()](src/rpc.rs:158)
```json
{
"jsonrpc": "2.0",
"id": 6,
"method": "herodb_ftInfo",
"params": [1, "product_catalog"]
}
```
Response shape:
```json
{
"jsonrpc": "2.0",
"id": 6,
"result": { "resp": "*...RESP encoded array with fields and counts..." }
}
```
5) FT.DEL — delete by doc id
Method: herodb_ftDel → [rust.fn ft_del()](src/rpc.rs:154)
```json
{
"jsonrpc": "2.0",
"id": 7,
"method": "herodb_ftDel",
"params": [1, "product_catalog", "product:1"]
}
```
Returns: true on success. Note: current implementation logs and returns success; physical delete may be a noop until delete is finalized in the engine.
6) FT.DROP — drop an index
Method: herodb_ftDrop → [rust.fn ft_drop()](src/rpc.rs:162)
```json
{
"jsonrpc": "2.0",
"id": 8,
"method": "herodb_ftDrop",
"params": [1, "product_catalog"]
}
```
Returns: true on success.
Field types and options
- TEXT: stored/indexed/tokenized text. "SORTABLE" marks it fast (stored + fast path in our wrapper).
- NUMERIC: stored/indexed numeric; default precision F64. "SORTABLE" enables fast column.
- TAG: exact matching terms. Options: "SEPARATOR" (default ","), "CASESENSITIVE" (default false).
- GEO: "lat,lon" string; stored as two numeric fields internally.
Backend and permission gating
- FT methods are rejected on DB 0.
- FT methods require the database backend to be Tantivy; otherwise RPC returns an error.
- Writelike FT methods (create/add/del/drop) follow the same permission model as Redis writes on selected databases.
Troubleshooting
- "DB backend is not Tantivy": ensure the database was created with backend "Tantivy".
- "FT not allowed on DB 0": use a nonadmin database id (>= 1).
- Empty search results: confirm that the queried fields are tokenized/indexed (TEXT) and that documents were added successfully.
Related docs
- Commandlevel search overview: [docs/search.md](docs/search.md:1)
- RPC definitions: [src/rpc.rs](src/rpc.rs:1)

171
examples/README.md Normal file
View File

@@ -0,0 +1,171 @@
# HeroDB Tantivy Search Examples
This directory contains examples demonstrating HeroDB's full-text search capabilities powered by Tantivy.
## Tantivy Search Demo (Bash Script)
### Overview
The `tantivy_search_demo.sh` script provides a comprehensive demonstration of HeroDB's search functionality using Redis commands. It showcases various search scenarios including basic text search, filtering, sorting, geographic queries, and more.
### Prerequisites
1. **HeroDB Server**: The server must be running on port 6381
2. **Redis CLI**: The `redis-cli` tool must be installed and available in your PATH
### Running the Demo
#### Step 1: Start HeroDB Server
```bash
# From the project root directory
cargo run -- --port 6381
```
#### Step 2: Run the Demo (in a new terminal)
```bash
# From the project root directory
./examples/tantivy_search_demo.sh
```
### What the Demo Covers
The script demonstrates 15 different search scenarios:
1. **Index Creation** - Creating a search index with various field types
2. **Data Insertion** - Adding sample products to the index
3. **Basic Text Search** - Simple keyword searches
4. **Filtered Search** - Combining text search with category filters
5. **Numeric Range Search** - Finding products within price ranges
6. **Sorting Results** - Ordering results by different fields
7. **Limited Results** - Pagination and result limiting
8. **Complex Queries** - Multi-field searches with sorting
9. **Geographic Search** - Location-based queries
10. **Index Information** - Getting statistics about the search index
11. **Search Comparison** - Tantivy vs simple pattern matching
12. **Fuzzy Search** - Typo tolerance and approximate matching
13. **Phrase Search** - Exact phrase matching
14. **Boolean Queries** - AND, OR, NOT operators
15. **Cleanup** - Removing test data
### Sample Data
The demo uses a product catalog with the following fields:
- **title** (TEXT) - Product name with higher search weight
- **description** (TEXT) - Detailed product description
- **category** (TAG) - Comma-separated categories
- **price** (NUMERIC) - Product price for range queries
- **rating** (NUMERIC) - Customer rating for sorting
- **location** (GEO) - Geographic coordinates for location searches
### Key Redis Commands Demonstrated
#### Index Management
```bash
# Create search index
FT.CREATE product_catalog ON HASH PREFIX 1 product: SCHEMA title TEXT WEIGHT 2.0 SORTABLE description TEXT category TAG SEPARATOR , price NUMERIC SORTABLE rating NUMERIC SORTABLE location GEO
# Get index information
FT.INFO product_catalog
# Drop index
FT.DROPINDEX product_catalog
```
#### Search Queries
```bash
# Basic text search
FT.SEARCH product_catalog wireless
# Filtered search
FT.SEARCH product_catalog 'organic @category:{food}'
# Numeric range
FT.SEARCH product_catalog '@price:[50 150]'
# Sorted results
FT.SEARCH product_catalog '@category:{electronics}' SORTBY price ASC
# Geographic search
FT.SEARCH product_catalog '@location:[37.7749 -122.4194 50 km]'
# Boolean queries
FT.SEARCH product_catalog 'wireless AND audio'
FT.SEARCH product_catalog 'coffee OR tea'
# Phrase search
FT.SEARCH product_catalog '"noise canceling"'
```
### Interactive Features
The demo script includes:
- **Colored output** for better readability
- **Pause between steps** to review results
- **Error handling** with clear error messages
- **Automatic cleanup** of test data
- **Progress indicators** showing what each step demonstrates
### Troubleshooting
#### HeroDB Not Running
```
✗ HeroDB is not running on port 6381
Please start HeroDB with: cargo run -- --port 6381
```
**Solution**: Start the HeroDB server in a separate terminal.
#### Redis CLI Not Found
```
redis-cli: command not found
```
**Solution**: Install Redis tools or use an alternative Redis client.
#### Connection Refused
```
Could not connect to Redis at localhost:6381: Connection refused
```
**Solution**: Ensure HeroDB is running and listening on the correct port.
### Manual Testing
You can also run individual commands manually:
```bash
# Connect to HeroDB
redis-cli -h localhost -p 6381
# Create a simple index
FT.CREATE myindex ON HASH SCHEMA title TEXT description TEXT
# Add a document
HSET doc:1 title "Hello World" description "This is a test document"
# Search
FT.SEARCH myindex hello
```
### Performance Notes
- **Indexing**: Documents are indexed in real-time as they're added
- **Search Speed**: Full-text search is much faster than pattern matching on large datasets
- **Memory Usage**: Tantivy indexes are memory-efficient and disk-backed
- **Scalability**: Supports millions of documents with sub-second search times
### Advanced Features
The demo showcases advanced Tantivy features:
- **Relevance Scoring** - Results ranked by relevance
- **Fuzzy Matching** - Handles typos and approximate matches
- **Field Weighting** - Title field has higher search weight
- **Multi-field Search** - Search across multiple fields simultaneously
- **Geographic Queries** - Distance-based location searches
- **Numeric Ranges** - Efficient range queries on numeric fields
- **Tag Filtering** - Fast categorical filtering
### Next Steps
After running the demo, explore:
1. **Custom Schemas** - Define your own field types and configurations
2. **Large Datasets** - Test with thousands or millions of documents
3. **Real Applications** - Integrate search into your applications
4. **Performance Tuning** - Optimize for your specific use case
For more information, see the [search documentation](../herodb/docs/search.md).

186
examples/simple_demo.sh Normal file
View File

@@ -0,0 +1,186 @@
#!/bin/bash
# Simple HeroDB Demo - Basic Redis Commands
# This script demonstrates basic Redis functionality that's currently implemented
set -e # Exit on any error
# Configuration
REDIS_HOST="localhost"
REDIS_PORT="6381"
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Function to print colored output
print_header() {
echo -e "${BLUE}=== $1 ===${NC}"
}
print_success() {
echo -e "${GREEN}$1${NC}"
}
print_info() {
echo -e "${YELLOW} $1${NC}"
}
print_error() {
echo -e "${RED}$1${NC}"
}
# Function to check if HeroDB is running
check_herodb() {
print_info "Checking if HeroDB is running on port $REDIS_PORT..."
if ! $REDIS_CLI ping > /dev/null 2>&1; then
print_error "HeroDB is not running on port $REDIS_PORT"
print_info "Please start HeroDB with: cargo run -- --port $REDIS_PORT"
exit 1
fi
print_success "HeroDB is running and responding"
}
# Function to execute Redis command with error handling
execute_cmd() {
local cmd="$1"
local description="$2"
echo -e "${YELLOW}Command:${NC} $cmd"
if result=$($REDIS_CLI $cmd 2>&1); then
echo -e "${GREEN}Result:${NC} $result"
return 0
else
print_error "Failed: $description"
echo "Error: $result"
return 1
fi
}
# Main demo function
main() {
clear
print_header "HeroDB Basic Functionality Demo"
echo "This demo shows basic Redis commands that are currently implemented"
echo "HeroDB runs on port $REDIS_PORT (instead of Redis default 6379)"
echo
# Check if HeroDB is running
check_herodb
echo
print_header "Step 1: Basic Key-Value Operations"
execute_cmd "SET greeting 'Hello HeroDB!'" "Setting a simple key-value pair"
echo
execute_cmd "GET greeting" "Getting the value"
echo
execute_cmd "SET counter 42" "Setting a numeric value"
echo
execute_cmd "INCR counter" "Incrementing the counter"
echo
execute_cmd "GET counter" "Getting the incremented value"
echo
print_header "Step 2: Hash Operations"
execute_cmd "HSET user:1 name 'John Doe' email 'john@example.com' age 30" "Setting hash fields"
echo
execute_cmd "HGET user:1 name" "Getting a specific field"
echo
execute_cmd "HGETALL user:1" "Getting all fields"
echo
execute_cmd "HLEN user:1" "Getting hash length"
echo
print_header "Step 3: List Operations"
execute_cmd "LPUSH tasks 'Write code' 'Test code' 'Deploy code'" "Adding items to list"
echo
execute_cmd "LLEN tasks" "Getting list length"
echo
execute_cmd "LRANGE tasks 0 -1" "Getting all list items"
echo
execute_cmd "LPOP tasks" "Popping from left"
echo
execute_cmd "LRANGE tasks 0 -1" "Checking remaining items"
echo
print_header "Step 4: Key Management"
execute_cmd "KEYS *" "Listing all keys"
echo
execute_cmd "EXISTS greeting" "Checking if key exists"
echo
execute_cmd "TYPE user:1" "Getting key type"
echo
execute_cmd "DBSIZE" "Getting database size"
echo
print_header "Step 5: Expiration"
execute_cmd "SET temp_key 'temporary value'" "Setting temporary key"
echo
execute_cmd "EXPIRE temp_key 5" "Setting 5 second expiration"
echo
execute_cmd "TTL temp_key" "Checking time to live"
echo
print_info "Waiting 2 seconds..."
sleep 2
execute_cmd "TTL temp_key" "Checking TTL again"
echo
print_header "Step 6: Multiple Operations"
execute_cmd "MSET key1 'value1' key2 'value2' key3 'value3'" "Setting multiple keys"
echo
execute_cmd "MGET key1 key2 key3" "Getting multiple values"
echo
execute_cmd "DEL key1 key2" "Deleting multiple keys"
echo
execute_cmd "EXISTS key1 key2 key3" "Checking existence of multiple keys"
echo
print_header "Step 7: Search Commands (Placeholder)"
print_info "Testing FT.CREATE command (currently returns placeholder response)"
execute_cmd "FT.CREATE test_index SCHEMA title TEXT description TEXT" "Creating search index"
echo
print_header "Step 8: Server Information"
execute_cmd "INFO" "Getting server information"
echo
execute_cmd "CONFIG GET dir" "Getting configuration"
echo
print_header "Step 9: Cleanup"
execute_cmd "FLUSHDB" "Clearing database"
echo
execute_cmd "DBSIZE" "Confirming database is empty"
echo
print_header "Demo Summary"
echo "This demonstration showed:"
echo "• Basic key-value operations (GET, SET, INCR)"
echo "• Hash operations (HSET, HGET, HGETALL)"
echo "• List operations (LPUSH, LPOP, LRANGE)"
echo "• Key management (KEYS, EXISTS, TYPE, DEL)"
echo "• Expiration handling (EXPIRE, TTL)"
echo "• Multiple key operations (MSET, MGET)"
echo "• Server information commands"
echo
print_success "HeroDB basic functionality demo completed successfully!"
echo
print_info "Note: Full-text search (FT.*) commands are defined but not yet fully implemented"
print_info "To run HeroDB server: cargo run -- --port 6381"
print_info "To connect with redis-cli: redis-cli -h localhost -p 6381"
}
# Run the demo
main "$@"

View File

@@ -1,28 +0,0 @@
[package]
name = "herodb"
version = "0.0.1"
authors = ["Pin Fang <fpfangpin@hotmail.com>"]
edition = "2021"
[dependencies]
anyhow = "1.0.59"
bytes = "1.3.0"
thiserror = "1.0.32"
tokio = { version = "1.23.0", features = ["full"] }
clap = { version = "4.5.20", features = ["derive"] }
byteorder = "1.4.3"
futures = "0.3"
redb = "2.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
bincode = "1.3.3"
chacha20poly1305 = "0.10.1"
rand = "0.8"
sha2 = "0.10"
age = "0.10"
secrecy = "0.8"
ed25519-dalek = "2"
base64 = "0.22"
[dev-dependencies]
redis = { version = "0.24", features = ["aio", "tokio-comp"] }

View File

@@ -1,173 +0,0 @@
# HeroDB AGE usage: Stateless vs KeyManaged
This document explains how to use the AGE cryptography commands exposed by HeroDB over the Redis protocol in two modes:
- Stateless (ephemeral keys; nothing stored on the server)
- Keymanaged (serverpersisted, named keys)
If you are new to the codebase, the exact tests that exercise these behaviors are:
- [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
- [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
Implementation entry points:
- [herodb/src/age.rs](herodb/src/age.rs)
- Dispatch from [herodb/src/cmd.rs](herodb/src/cmd.rs)
Note: Database-at-rest encryption flags in the test harness are unrelated to AGE commands; those flags control storage-level encryption of DB files. See the harness near [rust.start_test_server()](herodb/tests/usage_suite.rs:10).
## Quick start
Assuming the server is running on localhost on some PORT:
```bash
# Generate an ephemeral keypair and encrypt/decrypt a message (stateless mode)
redis-cli -p PORT AGE GENENC
# → returns an array: [recipient, identity]
redis-cli -p PORT AGE ENCRYPT <recipient> "hello world"
# → returns ciphertext (base64 in a bulk string)
redis-cli -p PORT AGE DECRYPT <identity> <ciphertext_b64>
# → returns "hello world"
```
For keymanaged mode, generate a named key once and reference it by name afterwards:
```bash
redis-cli -p PORT AGE KEYGEN app1
# → persists encryption keypair under name "app1"
redis-cli -p PORT AGE ENCRYPTNAME app1 "hello"
redis-cli -p PORT AGE DECRYPTNAME app1 <ciphertext_b64>
```
## Stateless AGE (ephemeral)
Characteristics
- No serverside storage of keys.
- You pass the actual key material with every call.
- Not listable via AGE LIST.
Commands and examples
1) Ephemeral encryption keys
```bash
# Generate an ephemeral encryption keypair
redis-cli -p PORT AGE GENENC
# Example output (abridged):
# 1) "age1qz..." # recipient (public)
# 2) "AGE-SECRET-KEY-1..." # identity (secret)
# Encrypt with the recipient
redis-cli -p PORT AGE ENCRYPT "age1qz..." "hello world"
# → returns bulk string payload: base64 ciphertext
# Decrypt with the identity (secret)
redis-cli -p PORT AGE DECRYPT "AGE-SECRET-KEY-1..." "<ciphertext_b64>"
# → "hello world"
```
2) Ephemeral signing keys
```bash
# Generate an ephemeral signing keypair
redis-cli -p PORT AGE GENSIGN
# Example output:
# 1) "<verify_pub_b64>"
# 2) "<sign_secret_b64>"
# Sign a message with the secret
redis-cli -p PORT AGE SIGN "<sign_secret_b64>" "msg"
# → returns "<signature_b64>"
# Verify with the public key
redis-cli -p PORT AGE VERIFY "<verify_pub_b64>" "msg" "<signature_b64>"
# → 1 (valid) or 0 (invalid)
```
When to use
- You do not want the server to store private keys.
- You already manage key material on the client side.
- You need adhoc operations without persistence.
Reference test: [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
## Keymanaged AGE (persistent, named)
Characteristics
- Server generates and persists keypairs under a chosen name.
- Clients refer to keys by name; raw secrets are not supplied on each call.
- Keys are discoverable via AGE LIST.
Commands and examples
1) Named encryption keys
```bash
# Create/persist a named encryption keypair
redis-cli -p PORT AGE KEYGEN app1
# → returns [recipient, identity] but also stores them under name "app1"
# Encrypt using the stored public key
redis-cli -p PORT AGE ENCRYPTNAME app1 "hello"
# → returns bulk string payload: base64 ciphertext
# Decrypt using the stored secret
redis-cli -p PORT AGE DECRYPTNAME app1 "<ciphertext_b64>"
# → "hello"
```
2) Named signing keys
```bash
# Create/persist a named signing keypair
redis-cli -p PORT AGE SIGNKEYGEN app1
# → returns [verify_pub_b64, sign_secret_b64] and stores under name "app1"
# Sign using the stored secret
redis-cli -p PORT AGE SIGNNAME app1 "msg"
# → returns "<signature_b64>"
# Verify using the stored public key
redis-cli -p PORT AGE VERIFYNAME app1 "msg" "<signature_b64>"
# → 1 (valid) or 0 (invalid)
```
3) List stored AGE keys
```bash
redis-cli -p PORT AGE LIST
# Example output includes labels such as "encpub" and your key names (e.g., "app1")
```
When to use
- You want centralized key storage/rotation and fewer secrets on the client.
- You need names/labels for workflows and can trust the server with secrets.
- You want discoverability (AGE LIST) and simpler client commands.
Reference test: [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
## Choosing a mode
- Prefer Stateless when:
- Minimizing server trust for secret material is the priority.
- Clients already have a secure mechanism to store/distribute keys.
- Prefer Keymanaged when:
- Centralized lifecycle, naming, and discoverability are beneficial.
- You plan to integrate rotation, ACLs, or auditability on the server side.
## Security notes
- Treat identities and signing secrets as sensitive; avoid logging them.
- For keymanaged mode, ensure server storage (and backups) are protected.
- AGE operations here are applicationlevel crypto and are distinct from database-at-rest encryption configured in the test harness.
## Repository pointers
- Stateless examples in tests: [rust.test_07_age_stateless_suite()](herodb/tests/usage_suite.rs:495)
- Keymanaged examples in tests: [rust.test_08_age_persistent_named_suite()](herodb/tests/usage_suite.rs:555)
- AGE implementation: [herodb/src/age.rs](herodb/src/age.rs)
- Command dispatch: [herodb/src/cmd.rs](herodb/src/cmd.rs)
- Bash demo: [herodb/examples/age_bash_demo.sh](herodb/examples/age_bash_demo.sh)
- Rust persistent demo: [herodb/examples/age_persist_demo.rs](herodb/examples/age_persist_demo.rs)
- Additional notes: [herodb/instructions/encrypt.md](herodb/instructions/encrypt.md)

View File

@@ -1,308 +0,0 @@
//! age.rs — AGE (rage) helpers + persistent key management for your mini-Redis.
//
// Features:
// - X25519 encryption/decryption (age style)
// - Ed25519 detached signatures + verification
// - Persistent named keys in DB (strings):
// age:key:{name} -> X25519 recipient (public encryption key, "age1...")
// age:privkey:{name} -> X25519 identity (secret encryption key, "AGE-SECRET-KEY-1...")
// age:signpub:{name} -> Ed25519 verify pubkey (public, used to verify signatures)
// age:signpriv:{name} -> Ed25519 signing secret key (private, used to sign)
// - Base64 wrapping for ciphertext/signature binary blobs.
use std::str::FromStr;
use secrecy::ExposeSecret;
use age::{Decryptor, Encryptor};
use age::x25519;
use ed25519_dalek::{Signature, Signer, Verifier, SigningKey, VerifyingKey};
use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
use crate::protocol::Protocol;
use crate::server::Server;
use crate::error::DBError;
// ---------- Internal helpers ----------
#[derive(Debug)]
pub enum AgeWireError {
ParseKey,
Crypto(String),
Utf8,
SignatureLen,
NotFound(&'static str), // which kind of key was missing
Storage(String),
}
impl AgeWireError {
fn to_protocol(self) -> Protocol {
match self {
AgeWireError::ParseKey => Protocol::err("ERR age: invalid key"),
AgeWireError::Crypto(e) => Protocol::err(&format!("ERR age: {e}")),
AgeWireError::Utf8 => Protocol::err("ERR age: invalid UTF-8 plaintext"),
AgeWireError::SignatureLen => Protocol::err("ERR age: bad signature length"),
AgeWireError::NotFound(w) => Protocol::err(&format!("ERR age: missing {w}")),
AgeWireError::Storage(e) => Protocol::err(&format!("ERR storage: {e}")),
}
}
}
fn parse_recipient(s: &str) -> Result<x25519::Recipient, AgeWireError> {
x25519::Recipient::from_str(s).map_err(|_| AgeWireError::ParseKey)
}
fn parse_identity(s: &str) -> Result<x25519::Identity, AgeWireError> {
x25519::Identity::from_str(s).map_err(|_| AgeWireError::ParseKey)
}
fn parse_ed25519_signing_key(s: &str) -> Result<SigningKey, AgeWireError> {
// Parse base64-encoded signing key
let bytes = B64.decode(s).map_err(|_| AgeWireError::ParseKey)?;
if bytes.len() != 32 {
return Err(AgeWireError::ParseKey);
}
let key_bytes: [u8; 32] = bytes.try_into().map_err(|_| AgeWireError::ParseKey)?;
Ok(SigningKey::from_bytes(&key_bytes))
}
fn parse_ed25519_verifying_key(s: &str) -> Result<VerifyingKey, AgeWireError> {
// Parse base64-encoded verifying key
let bytes = B64.decode(s).map_err(|_| AgeWireError::ParseKey)?;
if bytes.len() != 32 {
return Err(AgeWireError::ParseKey);
}
let key_bytes: [u8; 32] = bytes.try_into().map_err(|_| AgeWireError::ParseKey)?;
VerifyingKey::from_bytes(&key_bytes).map_err(|_| AgeWireError::ParseKey)
}
// ---------- Stateless crypto helpers (string in/out) ----------
pub fn gen_enc_keypair() -> (String, String) {
let id = x25519::Identity::generate();
let pk = id.to_public();
(pk.to_string(), id.to_string().expose_secret().to_string()) // (recipient, identity)
}
pub fn gen_sign_keypair() -> (String, String) {
use rand::RngCore;
use rand::rngs::OsRng;
// Generate random 32 bytes for the signing key
let mut secret_bytes = [0u8; 32];
OsRng.fill_bytes(&mut secret_bytes);
let signing_key = SigningKey::from_bytes(&secret_bytes);
let verifying_key = signing_key.verifying_key();
// Encode as base64 for storage
let signing_key_b64 = B64.encode(signing_key.to_bytes());
let verifying_key_b64 = B64.encode(verifying_key.to_bytes());
(verifying_key_b64, signing_key_b64) // (verify_pub, signing_secret)
}
/// Encrypt `msg` for `recipient_str` (X25519). Returns base64(ciphertext).
pub fn encrypt_b64(recipient_str: &str, msg: &str) -> Result<String, AgeWireError> {
let recipient = parse_recipient(recipient_str)?;
let enc = Encryptor::with_recipients(vec![Box::new(recipient)])
.expect("failed to create encryptor"); // Handle Option<Encryptor>
let mut out = Vec::new();
{
use std::io::Write;
let mut w = enc.wrap_output(&mut out).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
w.write_all(msg.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
w.finish().map_err(|e| AgeWireError::Crypto(e.to_string()))?;
}
Ok(B64.encode(out))
}
/// Decrypt base64(ciphertext) with `identity_str`. Returns plaintext String.
pub fn decrypt_b64(identity_str: &str, ct_b64: &str) -> Result<String, AgeWireError> {
let id = parse_identity(identity_str)?;
let ct = B64.decode(ct_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
let dec = Decryptor::new(&ct[..]).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
// The decrypt method returns a Result<StreamReader, DecryptError>
let mut r = match dec {
Decryptor::Recipients(d) => d.decrypt(std::iter::once(&id as &dyn age::Identity))
.map_err(|e| AgeWireError::Crypto(e.to_string()))?,
Decryptor::Passphrase(_) => return Err(AgeWireError::Crypto("Expected recipients, got passphrase".to_string())),
};
let mut pt = Vec::new();
use std::io::Read;
r.read_to_end(&mut pt).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
String::from_utf8(pt).map_err(|_| AgeWireError::Utf8)
}
/// Sign bytes of `msg` (detached). Returns base64(signature bytes, 64 bytes).
pub fn sign_b64(signing_secret_str: &str, msg: &str) -> Result<String, AgeWireError> {
let signing_key = parse_ed25519_signing_key(signing_secret_str)?;
let sig = signing_key.sign(msg.as_bytes());
Ok(B64.encode(sig.to_bytes()))
}
/// Verify detached signature (base64) for `msg` with pubkey.
pub fn verify_b64(verify_pub_str: &str, msg: &str, sig_b64: &str) -> Result<bool, AgeWireError> {
let verifying_key = parse_ed25519_verifying_key(verify_pub_str)?;
let sig_bytes = B64.decode(sig_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
if sig_bytes.len() != 64 {
return Err(AgeWireError::SignatureLen);
}
let sig = Signature::from_bytes(sig_bytes[..].try_into().unwrap());
Ok(verifying_key.verify(msg.as_bytes(), &sig).is_ok())
}
// ---------- Storage helpers ----------
fn sget(server: &Server, key: &str) -> Result<Option<String>, AgeWireError> {
let st = server.current_storage().map_err(|e| AgeWireError::Storage(e.0))?;
st.get(key).map_err(|e| AgeWireError::Storage(e.0))
}
fn sset(server: &Server, key: &str, val: &str) -> Result<(), AgeWireError> {
let st = server.current_storage().map_err(|e| AgeWireError::Storage(e.0))?;
st.set(key.to_string(), val.to_string()).map_err(|e| AgeWireError::Storage(e.0))
}
fn enc_pub_key_key(name: &str) -> String { format!("age:key:{name}") }
fn enc_priv_key_key(name: &str) -> String { format!("age:privkey:{name}") }
fn sign_pub_key_key(name: &str) -> String { format!("age:signpub:{name}") }
fn sign_priv_key_key(name: &str) -> String { format!("age:signpriv:{name}") }
// ---------- Command handlers (RESP Protocol) ----------
// Basic (stateless) ones kept for completeness
pub async fn cmd_age_genenc() -> Protocol {
let (recip, ident) = gen_enc_keypair();
Protocol::Array(vec![Protocol::BulkString(recip), Protocol::BulkString(ident)])
}
pub async fn cmd_age_gensign() -> Protocol {
let (verify, secret) = gen_sign_keypair();
Protocol::Array(vec![Protocol::BulkString(verify), Protocol::BulkString(secret)])
}
pub async fn cmd_age_encrypt(recipient: &str, message: &str) -> Protocol {
match encrypt_b64(recipient, message) {
Ok(b64) => Protocol::BulkString(b64),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_decrypt(identity: &str, ct_b64: &str) -> Protocol {
match decrypt_b64(identity, ct_b64) {
Ok(pt) => Protocol::BulkString(pt),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_sign(secret: &str, message: &str) -> Protocol {
match sign_b64(secret, message) {
Ok(b64sig) => Protocol::BulkString(b64sig),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_verify(verify_pub: &str, message: &str, sig_b64: &str) -> Protocol {
match verify_b64(verify_pub, message, sig_b64) {
Ok(true) => Protocol::SimpleString("1".to_string()),
Ok(false) => Protocol::SimpleString("0".to_string()),
Err(e) => e.to_protocol(),
}
}
// ---------- NEW: Persistent, named-key commands ----------
pub async fn cmd_age_keygen(server: &Server, name: &str) -> Protocol {
let (recip, ident) = gen_enc_keypair();
if let Err(e) = sset(server, &enc_pub_key_key(name), &recip) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_priv_key_key(name), &ident) { return e.to_protocol(); }
Protocol::Array(vec![Protocol::BulkString(recip), Protocol::BulkString(ident)])
}
pub async fn cmd_age_signkeygen(server: &Server, name: &str) -> Protocol {
let (verify, secret) = gen_sign_keypair();
if let Err(e) = sset(server, &sign_pub_key_key(name), &verify) { return e.to_protocol(); }
if let Err(e) = sset(server, &sign_priv_key_key(name), &secret) { return e.to_protocol(); }
Protocol::Array(vec![Protocol::BulkString(verify), Protocol::BulkString(secret)])
}
pub async fn cmd_age_encrypt_name(server: &Server, name: &str, message: &str) -> Protocol {
let recip = match sget(server, &enc_pub_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => return AgeWireError::NotFound("recipient (age:key:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
};
match encrypt_b64(&recip, message) {
Ok(ct) => Protocol::BulkString(ct),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_decrypt_name(server: &Server, name: &str, ct_b64: &str) -> Protocol {
let ident = match sget(server, &enc_priv_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => return AgeWireError::NotFound("identity (age:privkey:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
};
match decrypt_b64(&ident, ct_b64) {
Ok(pt) => Protocol::BulkString(pt),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_sign_name(server: &Server, name: &str, message: &str) -> Protocol {
let sec = match sget(server, &sign_priv_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => return AgeWireError::NotFound("signing secret (age:signpriv:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
};
match sign_b64(&sec, message) {
Ok(sig) => Protocol::BulkString(sig),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_verify_name(server: &Server, name: &str, message: &str, sig_b64: &str) -> Protocol {
let pubk = match sget(server, &sign_pub_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => return AgeWireError::NotFound("verify pubkey (age:signpub:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
};
match verify_b64(&pubk, message, sig_b64) {
Ok(true) => Protocol::SimpleString("1".to_string()),
Ok(false) => Protocol::SimpleString("0".to_string()),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_list(server: &Server) -> Protocol {
// Returns 4 arrays: ["encpub", <names...>], ["encpriv", ...], ["signpub", ...], ["signpriv", ...]
let st = match server.current_storage() { Ok(s) => s, Err(e) => return Protocol::err(&e.0) };
let pull = |pat: &str, prefix: &str| -> Result<Vec<String>, DBError> {
let keys = st.keys(pat)?;
let mut names: Vec<String> = keys.into_iter()
.filter_map(|k| k.strip_prefix(prefix).map(|x| x.to_string()))
.collect();
names.sort();
Ok(names)
};
let encpub = match pull("age:key:*", "age:key:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let encpriv = match pull("age:privkey:*", "age:privkey:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let signpub = match pull("age:signpub:*", "age:signpub:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let signpriv= match pull("age:signpriv:*", "age:signpriv:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let to_arr = |label: &str, v: Vec<String>| {
let mut out = vec![Protocol::BulkString(label.to_string())];
out.push(Protocol::Array(v.into_iter().map(Protocol::BulkString).collect()));
Protocol::Array(out)
};
Protocol::Array(vec![
to_arr("encpub", encpub),
to_arr("encpriv", encpriv),
to_arr("signpub", signpub),
to_arr("signpriv", signpriv),
])
}

View File

@@ -1,8 +0,0 @@
pub mod age; // NEW
pub mod cmd;
pub mod crypto;
pub mod error;
pub mod options;
pub mod protocol;
pub mod server;
pub mod storage;

View File

@@ -1,81 +0,0 @@
// #![allow(unused_imports)]
use tokio::net::TcpListener;
use herodb::server;
use clap::Parser;
/// Simple program to greet a person
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
/// The directory of Redis DB file
#[arg(long)]
dir: String,
/// The port of the Redis server, default is 6379 if not specified
#[arg(long)]
port: Option<u16>,
/// Enable debug mode
#[arg(long)]
debug: bool,
/// Master encryption key for encrypted databases
#[arg(long)]
encryption_key: Option<String>,
/// Encrypt the database
#[arg(long)]
encrypt: bool,
}
#[tokio::main]
async fn main() {
// parse args
let args = Args::parse();
// bind port
let port = args.port.unwrap_or(6379);
println!("will listen on port: {}", port);
let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
.await
.unwrap();
// new DB option
let option = herodb::options::DBOption {
dir: args.dir,
port,
debug: args.debug,
encryption_key: args.encryption_key,
encrypt: args.encrypt,
};
// new server
let server = server::Server::new(option).await;
// Add a small delay to ensure the port is ready
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
// accept new connections
loop {
let stream = listener.accept().await;
match stream {
Ok((stream, _)) => {
println!("accepted new connection");
let mut sc = server.clone();
tokio::spawn(async move {
if let Err(e) = sc.handle(stream).await {
println!("error: {:?}, will close the connection. Bye", e);
}
});
}
Err(e) => {
println!("error: {}", e);
}
}
}
}

View File

@@ -1,8 +0,0 @@
#[derive(Clone)]
pub struct DBOption {
pub dir: String,
pub port: u16,
pub debug: bool,
pub encrypt: bool,
pub encryption_key: Option<String>, // Master encryption key
}

View File

@@ -1,126 +0,0 @@
use std::{
path::Path,
time::{SystemTime, UNIX_EPOCH},
};
use redb::{Database, TableDefinition};
use serde::{Deserialize, Serialize};
use crate::crypto::CryptoFactory;
use crate::error::DBError;
// Re-export modules
mod storage_basic;
mod storage_hset;
mod storage_lists;
mod storage_extra;
// Re-export implementations
// Note: These imports are used by the impl blocks in the submodules
// The compiler shows them as unused because they're not directly used in this file
// but they're needed for the Storage struct methods to be available
pub use storage_extra::*;
// Table definitions for different Redis data types
const TYPES_TABLE: TableDefinition<&str, &str> = TableDefinition::new("types");
const STRINGS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("strings");
const HASHES_TABLE: TableDefinition<(&str, &str), &[u8]> = TableDefinition::new("hashes");
const LISTS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("lists");
const STREAMS_META_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("streams_meta");
const STREAMS_DATA_TABLE: TableDefinition<(&str, &str), &[u8]> = TableDefinition::new("streams_data");
const ENCRYPTED_TABLE: TableDefinition<&str, u8> = TableDefinition::new("encrypted");
const EXPIRATION_TABLE: TableDefinition<&str, u64> = TableDefinition::new("expiration");
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct StreamEntry {
pub fields: Vec<(String, String)>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ListValue {
pub elements: Vec<String>,
}
#[inline]
pub fn now_in_millis() -> u128 {
let start = SystemTime::now();
let duration_since_epoch = start.duration_since(UNIX_EPOCH).unwrap();
duration_since_epoch.as_millis()
}
pub struct Storage {
db: Database,
crypto: Option<CryptoFactory>,
}
impl Storage {
pub fn new(path: impl AsRef<Path>, should_encrypt: bool, master_key: Option<&str>) -> Result<Self, DBError> {
let db = Database::create(path)?;
// Create tables if they don't exist
let write_txn = db.begin_write()?;
{
let _ = write_txn.open_table(TYPES_TABLE)?;
let _ = write_txn.open_table(STRINGS_TABLE)?;
let _ = write_txn.open_table(HASHES_TABLE)?;
let _ = write_txn.open_table(LISTS_TABLE)?;
let _ = write_txn.open_table(STREAMS_META_TABLE)?;
let _ = write_txn.open_table(STREAMS_DATA_TABLE)?;
let _ = write_txn.open_table(ENCRYPTED_TABLE)?;
let _ = write_txn.open_table(EXPIRATION_TABLE)?;
}
write_txn.commit()?;
// Check if database was previously encrypted
let read_txn = db.begin_read()?;
let encrypted_table = read_txn.open_table(ENCRYPTED_TABLE)?;
let was_encrypted = encrypted_table.get("encrypted")?.map(|v| v.value() == 1).unwrap_or(false);
drop(read_txn);
let crypto = if should_encrypt || was_encrypted {
if let Some(key) = master_key {
Some(CryptoFactory::new(key.as_bytes()))
} else {
return Err(DBError("Encryption requested but no master key provided".to_string()));
}
} else {
None
};
// If we're enabling encryption for the first time, mark it
if should_encrypt && !was_encrypted {
let write_txn = db.begin_write()?;
{
let mut encrypted_table = write_txn.open_table(ENCRYPTED_TABLE)?;
encrypted_table.insert("encrypted", &1u8)?;
}
write_txn.commit()?;
}
Ok(Storage {
db,
crypto,
})
}
pub fn is_encrypted(&self) -> bool {
self.crypto.is_some()
}
// Helper methods for encryption
fn encrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
if let Some(crypto) = &self.crypto {
Ok(crypto.encrypt(data))
} else {
Ok(data.to_vec())
}
}
fn decrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
if let Some(crypto) = &self.crypto {
Ok(crypto.decrypt(data)?)
} else {
Ok(data.to_vec())
}
}
}

143
run.sh Executable file
View File

@@ -0,0 +1,143 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# Test script for HeroDB - Redis-compatible database with redb backend
# This script starts the server and runs comprehensive tests
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
DB_DIR="/tmp/test_db"
PORT=6381
SERVER_PID=""
# Function to print colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
# Function to cleanup on exit
cleanup() {
if [ ! -z "$SERVER_PID" ]; then
print_status "Stopping HeroDB server (PID: $SERVER_PID)..."
kill $SERVER_PID 2>/dev/null || true
wait $SERVER_PID 2>/dev/null || true
fi
# Clean up test database
if [ -d "$DB_DIR" ]; then
print_status "Cleaning up test database directory..."
rm -rf "$DB_DIR"
fi
}
# Set trap to cleanup on script exit
trap cleanup EXIT
# Function to wait for server to start
wait_for_server() {
local max_attempts=30
local attempt=1
print_status "Waiting for server to start on port $PORT..."
while [ $attempt -le $max_attempts ]; do
if nc -z localhost $PORT 2>/dev/null; then
print_success "Server is ready!"
return 0
fi
echo -n "."
sleep 1
attempt=$((attempt + 1))
done
print_error "Server failed to start within $max_attempts seconds"
return 1
}
# Function to send Redis command and get response
redis_cmd() {
local cmd="$1"
local expected="$2"
print_status "Testing: $cmd"
local result=$(echo "$cmd" | redis-cli -p $PORT --raw 2>/dev/null || echo "ERROR")
if [ "$expected" != "" ] && [ "$result" != "$expected" ]; then
print_error "Expected: '$expected', Got: '$result'"
return 1
else
print_success "$cmd -> $result"
return 0
fi
}
# Main execution
main() {
print_status "Starting HeroDB"
# Build the project
print_status "Building HeroDB..."
if ! cargo build -p herodb --release; then
print_error "Failed to build HeroDB"
exit 1
fi
# Create test database directory
mkdir -p "$DB_DIR"
# Start the server
print_status "Starting HeroDB server..."
${SCRIPT_DIR}/target/release/herodb --dir "$DB_DIR" --port $PORT &
SERVER_PID=$!
# Wait for server to start
if ! wait_for_server; then
print_error "Failed to start server"
exit 1
fi
}
# Check dependencies
check_dependencies() {
if ! command -v cargo &> /dev/null; then
print_error "cargo is required but not installed"
exit 1
fi
if ! command -v nc &> /dev/null; then
print_warning "netcat (nc) not found - some tests may not work properly"
fi
if ! command -v redis-cli &> /dev/null; then
print_warning "redis-cli not found - using netcat fallback"
fi
}
# Run dependency check and main function
check_dependencies
main "$@"
tail -f /dev/null

View File

@@ -1,4 +1,7 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
echo "🧪 Running HeroDB Redis Compatibility Tests"
echo "=========================================="

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,113 @@
========================
CODE SNIPPETS
========================
TITLE: Basic Database Operations with sled in Rust
DESCRIPTION: This snippet demonstrates fundamental operations using the `sled` embedded database in Rust. It covers opening a database tree, inserting and retrieving key-value pairs, performing range queries, deleting entries, and executing an atomic compare-and-swap operation. It also shows how to flush changes to disk for durability.
SOURCE: https://github.com/spacejam/sled/blob/main/README.md#_snippet_0
LANGUAGE: Rust
CODE:
```
let tree = sled::open("/tmp/welcome-to-sled")?;
// insert and get, similar to std's BTreeMap
let old_value = tree.insert("key", "value")?;
assert_eq!(
tree.get(&"key")?,
Some(sled::IVec::from("value")),
);
// range queries
for kv_result in tree.range("key_1".."key_9") {}
// deletion
let old_value = tree.remove(&"key")?;
// atomic compare and swap
tree.compare_and_swap(
"key",
Some("current_value"),
Some("new_value"),
)?;
// block until all operations are stable on disk
// (flush_async also available to get a Future)
tree.flush()?;
```
----------------------------------------
TITLE: Subscribing to sled Events Asynchronously (Rust)
DESCRIPTION: This snippet demonstrates how to asynchronously subscribe to events on key prefixes in a `sled` database. It initializes a `sled` database, creates a `Subscriber` for all key prefixes, inserts a key-value pair to trigger an event, and then uses `extreme::run` to await and process incoming events. The `Subscriber` struct implements `Future<Output=Option<Event>>`, allowing it to be awaited in an async context.
SOURCE: https://github.com/spacejam/sled/blob/main/README.md#_snippet_1
LANGUAGE: Rust
CODE:
```
let sled = sled::open("my_db").unwrap();
let mut sub = sled.watch_prefix("");
sled.insert(b"a", b"a").unwrap();
extreme::run(async move {
while let Some(event) = (&mut sub).await {
println!("got event {:?}", event);
}
});
```
----------------------------------------
TITLE: Iterating Subscriber Events with Async/Await in Rust
DESCRIPTION: This snippet demonstrates how to asynchronously iterate over events from a `Subscriber` instance in Rust. Since `Subscriber` now implements `Future`, it can be awaited in a loop to process incoming events, enabling efficient prefix watching. The loop continues as long as new events are available.
SOURCE: https://github.com/spacejam/sled/blob/main/CHANGELOG.md#_snippet_0
LANGUAGE: Rust
CODE:
```
while let Some(event) = (&mut subscriber).await {}
```
----------------------------------------
TITLE: Suppressing TSAN Race on Arc::drop in Rust
DESCRIPTION: This suppression addresses a false positive race detection by ThreadSanitizer in Rust's `Arc::drop` implementation. TSAN fails to correctly reason about the raw atomic `Acquire` fence used after the strong-count atomic subtraction with a `Release` fence in the `Drop` implementation, leading to an erroneous race report.
SOURCE: https://github.com/spacejam/sled/blob/main/tsan_suppressions.txt#_snippet_0
LANGUAGE: TSAN Suppression
CODE:
```
race:Arc*drop
```
----------------------------------------
TITLE: Suppressing TSAN Race on std::thread::local in Rust
DESCRIPTION: This suppression addresses ThreadSanitizer false positives when using Rust's `std::thread::local`. Similar to `lazy_static`, `std::thread::local` utilizes implicit barriers that TSAN fails to recognize, leading to incorrect race condition reports.
SOURCE: https://github.com/spacejam/sled/blob/main/tsan_suppressions.txt#_snippet_2
LANGUAGE: TSAN Suppression
CODE:
```
race:std::thread::local
```
----------------------------------------
TITLE: Suppressing TSAN Race on lazy_static in Rust
DESCRIPTION: This suppression targets ThreadSanitizer false positives related to the `lazy_static` crate in Rust. `lazy_static` relies on implicit memory barriers that TSAN does not correctly detect, causing it to report races where none exist.
SOURCE: https://github.com/spacejam/sled/blob/main/tsan_suppressions.txt#_snippet_1
LANGUAGE: TSAN Suppression
CODE:
```
race:lazy_static
```

View File

@@ -0,0 +1,735 @@
Looking at your request to ensure embedding happens with an external embedding provider configured in Redis at `config:core:aiembed:url`, I'll modify the implementation to use an external embedding service instead of local models.
## Updated Implementation with External Embedding Provider
### 1. Update Dependencies in `Cargo.toml`
```toml
[dependencies]
# ... existing dependencies ...
lance = "0.18"
lance-index = "0.18"
lance-linalg = "0.18"
arrow = "52"
arrow-array = "52"
arrow-schema = "52"
parquet = "52"
uuid = { version = "1.10", features = ["v4"] }
reqwest = { version = "0.11", features = ["json"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
base64 = "0.22"
image = "0.25"
```
### 2. Create Enhanced Lance Module with External Embedding
Create `src/lance_store.rs`:
```rust
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::RwLock;
use arrow::array::{Float32Array, StringArray, BinaryArray, ArrayRef};
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
use lance::dataset::{Dataset, WriteParams, WriteMode};
use lance::index::vector::VectorIndexParams;
use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::ivf::IvfBuildParams;
use serde::{Deserialize, Serialize};
use crate::error::DBError;
use crate::cmd::Protocol;
#[derive(Debug, Serialize, Deserialize)]
struct EmbeddingRequest {
texts: Option<Vec<String>>,
images: Option<Vec<String>>, // base64 encoded
model: Option<String>,
}
#[derive(Debug, Serialize, Deserialize)]
struct EmbeddingResponse {
embeddings: Vec<Vec<f32>>,
model: String,
usage: Option<HashMap<String, u32>>,
}
pub struct LanceStore {
datasets: Arc<RwLock<HashMap<String, Arc<Dataset>>>>,
data_dir: PathBuf,
http_client: reqwest::Client,
}
impl LanceStore {
pub async fn new(data_dir: PathBuf) -> Result<Self, DBError> {
// Create data directory if it doesn't exist
std::fs::create_dir_all(&data_dir)
.map_err(|e| DBError(format!("Failed to create Lance data directory: {}", e)))?;
let http_client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.map_err(|e| DBError(format!("Failed to create HTTP client: {}", e)))?;
Ok(Self {
datasets: Arc::new(RwLock::new(HashMap::new())),
data_dir,
http_client,
})
}
/// Get embedding service URL from Redis config
async fn get_embedding_url(&self, server: &crate::server::Server) -> Result<String, DBError> {
// Get the embedding URL from Redis config
let key = "config:core:aiembed:url";
// Use HGET to retrieve the URL from Redis hash
let cmd = crate::cmd::Cmd::HGet {
key: key.to_string(),
field: "url".to_string(),
};
// Execute command to get the config
let result = cmd.run(server).await?;
match result {
Protocol::BulkString(url) => Ok(url),
Protocol::SimpleString(url) => Ok(url),
Protocol::Nil => Err(DBError(
"Embedding service URL not configured. Set it with: HSET config:core:aiembed:url url <YOUR_EMBEDDING_SERVICE_URL>".to_string()
)),
_ => Err(DBError("Invalid embedding URL configuration".to_string())),
}
}
/// Call external embedding service
async fn call_embedding_service(
&self,
server: &crate::server::Server,
texts: Option<Vec<String>>,
images: Option<Vec<String>>,
) -> Result<Vec<Vec<f32>>, DBError> {
let url = self.get_embedding_url(server).await?;
let request = EmbeddingRequest {
texts,
images,
model: None, // Let the service use its default
};
let response = self.http_client
.post(&url)
.json(&request)
.send()
.await
.map_err(|e| DBError(format!("Failed to call embedding service: {}", e)))?;
if !response.status().is_success() {
let status = response.status();
let error_text = response.text().await.unwrap_or_default();
return Err(DBError(format!(
"Embedding service returned error {}: {}",
status, error_text
)));
}
let embedding_response: EmbeddingResponse = response
.json()
.await
.map_err(|e| DBError(format!("Failed to parse embedding response: {}", e)))?;
Ok(embedding_response.embeddings)
}
pub async fn embed_text(
&self,
server: &crate::server::Server,
texts: Vec<String>
) -> Result<Vec<Vec<f32>>, DBError> {
if texts.is_empty() {
return Ok(Vec::new());
}
self.call_embedding_service(server, Some(texts), None).await
}
pub async fn embed_image(
&self,
server: &crate::server::Server,
image_bytes: Vec<u8>
) -> Result<Vec<f32>, DBError> {
// Convert image bytes to base64
let base64_image = base64::encode(&image_bytes);
let embeddings = self.call_embedding_service(
server,
None,
Some(vec![base64_image])
).await?;
embeddings.into_iter()
.next()
.ok_or_else(|| DBError("No embedding returned for image".to_string()))
}
pub async fn create_dataset(
&self,
name: &str,
schema: Schema,
) -> Result<(), DBError> {
let dataset_path = self.data_dir.join(format!("{}.lance", name));
// Create empty dataset with schema
let write_params = WriteParams {
mode: WriteMode::Create,
..Default::default()
};
// Create an empty RecordBatch with the schema
let empty_batch = RecordBatch::new_empty(Arc::new(schema));
let batches = vec![empty_batch];
let dataset = Dataset::write(
batches,
dataset_path.to_str().unwrap(),
Some(write_params)
).await
.map_err(|e| DBError(format!("Failed to create dataset: {}", e)))?;
let mut datasets = self.datasets.write().await;
datasets.insert(name.to_string(), Arc::new(dataset));
Ok(())
}
pub async fn write_vectors(
&self,
dataset_name: &str,
vectors: Vec<Vec<f32>>,
metadata: Option<HashMap<String, Vec<String>>>,
) -> Result<usize, DBError> {
let dataset_path = self.data_dir.join(format!("{}.lance", dataset_name));
// Open or get cached dataset
let dataset = self.get_or_open_dataset(dataset_name).await?;
// Build RecordBatch
let num_vectors = vectors.len();
if num_vectors == 0 {
return Ok(0);
}
let dim = vectors.first()
.ok_or_else(|| DBError("Empty vectors".to_string()))?
.len();
// Flatten vectors
let flat_vectors: Vec<f32> = vectors.into_iter().flatten().collect();
let vector_array = Float32Array::from(flat_vectors);
let vector_array = arrow::array::FixedSizeListArray::try_new_from_values(
vector_array,
dim as i32
).map_err(|e| DBError(format!("Failed to create vector array: {}", e)))?;
let mut arrays: Vec<ArrayRef> = vec![Arc::new(vector_array)];
let mut fields = vec![Field::new(
"vector",
DataType::FixedSizeList(
Arc::new(Field::new("item", DataType::Float32, true)),
dim as i32
),
false
)];
// Add metadata columns if provided
if let Some(metadata) = metadata {
for (key, values) in metadata {
if values.len() != num_vectors {
return Err(DBError(format!(
"Metadata field '{}' has {} values but expected {}",
key, values.len(), num_vectors
)));
}
let array = StringArray::from(values);
arrays.push(Arc::new(array));
fields.push(Field::new(&key, DataType::Utf8, true));
}
}
let schema = Arc::new(Schema::new(fields));
let batch = RecordBatch::try_new(schema, arrays)
.map_err(|e| DBError(format!("Failed to create RecordBatch: {}", e)))?;
// Append to dataset
let write_params = WriteParams {
mode: WriteMode::Append,
..Default::default()
};
Dataset::write(
vec![batch],
dataset_path.to_str().unwrap(),
Some(write_params)
).await
.map_err(|e| DBError(format!("Failed to write to dataset: {}", e)))?;
// Refresh cached dataset
let mut datasets = self.datasets.write().await;
datasets.remove(dataset_name);
Ok(num_vectors)
}
pub async fn search_vectors(
&self,
dataset_name: &str,
query_vector: Vec<f32>,
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
let dataset = self.get_or_open_dataset(dataset_name).await?;
// Build query
let mut query = dataset.scan();
query = query.nearest(
"vector",
&query_vector,
k,
).map_err(|e| DBError(format!("Failed to build search query: {}", e)))?;
if let Some(nprobes) = nprobes {
query = query.nprobes(nprobes);
}
if let Some(refine) = refine_factor {
query = query.refine_factor(refine);
}
// Execute search
let results = query
.try_into_stream()
.await
.map_err(|e| DBError(format!("Failed to execute search: {}", e)))?
.try_collect::<Vec<_>>()
.await
.map_err(|e| DBError(format!("Failed to collect results: {}", e)))?;
// Process results
let mut output = Vec::new();
for batch in results {
// Get distances
let distances = batch
.column_by_name("_distance")
.ok_or_else(|| DBError("No distance column".to_string()))?
.as_any()
.downcast_ref::<Float32Array>()
.ok_or_else(|| DBError("Invalid distance type".to_string()))?;
// Get metadata
for i in 0..batch.num_rows() {
let distance = distances.value(i);
let mut metadata = HashMap::new();
for field in batch.schema().fields() {
if field.name() != "vector" && field.name() != "_distance" {
if let Some(col) = batch.column_by_name(field.name()) {
if let Some(str_array) = col.as_any().downcast_ref::<StringArray>() {
if !str_array.is_null(i) {
metadata.insert(
field.name().to_string(),
str_array.value(i).to_string()
);
}
}
}
}
}
output.push((distance, metadata));
}
}
Ok(output)
}
pub async fn store_multimodal(
&self,
server: &crate::server::Server,
dataset_name: &str,
text: Option<String>,
image_bytes: Option<Vec<u8>>,
metadata: HashMap<String, String>,
) -> Result<String, DBError> {
// Generate ID
let id = uuid::Uuid::new_v4().to_string();
// Generate embeddings using external service
let embedding = if let Some(text) = text.as_ref() {
self.embed_text(server, vec![text.clone()]).await?
.into_iter()
.next()
.ok_or_else(|| DBError("No embedding returned".to_string()))?
} else if let Some(img) = image_bytes.as_ref() {
self.embed_image(server, img.clone()).await?
} else {
return Err(DBError("No text or image provided".to_string()));
};
// Prepare metadata
let mut full_metadata = metadata;
full_metadata.insert("id".to_string(), id.clone());
if let Some(text) = text {
full_metadata.insert("text".to_string(), text);
}
if let Some(img) = image_bytes {
full_metadata.insert("image_base64".to_string(), base64::encode(img));
}
// Convert metadata to column vectors
let mut metadata_cols = HashMap::new();
for (key, value) in full_metadata {
metadata_cols.insert(key, vec![value]);
}
// Write to dataset
self.write_vectors(dataset_name, vec![embedding], Some(metadata_cols)).await?;
Ok(id)
}
pub async fn search_with_text(
&self,
server: &crate::server::Server,
dataset_name: &str,
query_text: String,
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
// Embed the query text using external service
let embeddings = self.embed_text(server, vec![query_text]).await?;
let query_vector = embeddings.into_iter()
.next()
.ok_or_else(|| DBError("No embedding returned for query".to_string()))?;
// Search with the embedding
self.search_vectors(dataset_name, query_vector, k, nprobes, refine_factor).await
}
pub async fn create_index(
&self,
dataset_name: &str,
index_type: &str,
num_partitions: Option<usize>,
num_sub_vectors: Option<usize>,
) -> Result<(), DBError> {
let dataset = self.get_or_open_dataset(dataset_name).await?;
let mut params = VectorIndexParams::default();
match index_type.to_uppercase().as_str() {
"IVF_PQ" => {
params.ivf = IvfBuildParams {
num_partitions: num_partitions.unwrap_or(256),
..Default::default()
};
params.pq = PQBuildParams {
num_sub_vectors: num_sub_vectors.unwrap_or(16),
..Default::default()
};
}
_ => return Err(DBError(format!("Unsupported index type: {}", index_type))),
}
dataset.create_index(
&["vector"],
lance::index::IndexType::Vector,
None,
&params,
true
).await
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
Ok(())
}
async fn get_or_open_dataset(&self, name: &str) -> Result<Arc<Dataset>, DBError> {
let mut datasets = self.datasets.write().await;
if let Some(dataset) = datasets.get(name) {
return Ok(dataset.clone());
}
let dataset_path = self.data_dir.join(format!("{}.lance", name));
if !dataset_path.exists() {
return Err(DBError(format!("Dataset '{}' does not exist", name)));
}
let dataset = Dataset::open(dataset_path.to_str().unwrap())
.await
.map_err(|e| DBError(format!("Failed to open dataset: {}", e)))?;
let dataset = Arc::new(dataset);
datasets.insert(name.to_string(), dataset.clone());
Ok(dataset)
}
pub async fn list_datasets(&self) -> Result<Vec<String>, DBError> {
let mut datasets = Vec::new();
let entries = std::fs::read_dir(&self.data_dir)
.map_err(|e| DBError(format!("Failed to read data directory: {}", e)))?;
for entry in entries {
let entry = entry.map_err(|e| DBError(format!("Failed to read entry: {}", e)))?;
let path = entry.path();
if path.is_dir() {
if let Some(name) = path.file_name() {
if let Some(name_str) = name.to_str() {
if name_str.ends_with(".lance") {
let dataset_name = name_str.trim_end_matches(".lance");
datasets.push(dataset_name.to_string());
}
}
}
}
}
Ok(datasets)
}
pub async fn drop_dataset(&self, name: &str) -> Result<(), DBError> {
// Remove from cache
let mut datasets = self.datasets.write().await;
datasets.remove(name);
// Delete from disk
let dataset_path = self.data_dir.join(format!("{}.lance", name));
if dataset_path.exists() {
std::fs::remove_dir_all(dataset_path)
.map_err(|e| DBError(format!("Failed to delete dataset: {}", e)))?;
}
Ok(())
}
pub async fn get_dataset_info(&self, name: &str) -> Result<HashMap<String, String>, DBError> {
let dataset = self.get_or_open_dataset(name).await?;
let mut info = HashMap::new();
info.insert("name".to_string(), name.to_string());
info.insert("version".to_string(), dataset.version().to_string());
info.insert("num_rows".to_string(), dataset.count_rows().await?.to_string());
// Get schema info
let schema = dataset.schema();
let fields: Vec<String> = schema.fields()
.iter()
.map(|f| format!("{}:{}", f.name(), f.data_type()))
.collect();
info.insert("schema".to_string(), fields.join(", "));
Ok(info)
}
}
```
### 3. Update Command Implementations
Update the command implementations to pass the server reference for embedding service access:
```rust
// In cmd.rs, update the lance command implementations
async fn lance_store_cmd(
server: &Server,
dataset: &str,
text: Option<String>,
image_base64: Option<String>,
metadata: HashMap<String, String>,
) -> Result<Protocol, DBError> {
let lance_store = server.lance_store()?;
// Decode image if provided
let image_bytes = if let Some(b64) = image_base64 {
Some(base64::decode(b64).map_err(|e|
DBError(format!("Invalid base64 image: {}", e)))?)
} else {
None
};
// Pass server reference for embedding service access
let id = lance_store.store_multimodal(
server, // Pass server to access Redis config
dataset,
text,
image_bytes,
metadata,
).await?;
Ok(Protocol::BulkString(id))
}
async fn lance_embed_text_cmd(
server: &Server,
texts: &[String],
) -> Result<Protocol, DBError> {
let lance_store = server.lance_store()?;
// Pass server reference for embedding service access
let embeddings = lance_store.embed_text(server, texts.to_vec()).await?;
// Return as array of vectors
let mut output = Vec::new();
for embedding in embeddings {
let vector_str = format!("[{}]",
embedding.iter()
.map(|f| f.to_string())
.collect::<Vec<_>>()
.join(",")
);
output.push(Protocol::BulkString(vector_str));
}
Ok(Protocol::Array(output))
}
async fn lance_search_text_cmd(
server: &Server,
dataset: &str,
query_text: &str,
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
) -> Result<Protocol, DBError> {
let lance_store = server.lance_store()?;
// Search using text query (will be embedded automatically)
let results = lance_store.search_with_text(
server,
dataset,
query_text.to_string(),
k,
nprobes,
refine_factor,
).await?;
// Format results
let mut output = Vec::new();
for (distance, metadata) in results {
let metadata_json = serde_json::to_string(&metadata)
.unwrap_or_else(|_| "{}".to_string());
output.push(Protocol::Array(vec![
Protocol::BulkString(distance.to_string()),
Protocol::BulkString(metadata_json),
]));
}
Ok(Protocol::Array(output))
}
// Add new command for text-based search
pub enum Cmd {
// ... existing commands ...
LanceSearchText {
dataset: String,
query_text: String,
k: usize,
nprobes: Option<usize>,
refine_factor: Option<usize>,
},
}
```
## Usage Examples
### 1. Configure the Embedding Service
First, users need to configure the embedding service URL:
```bash
# Configure the embedding service endpoint
redis-cli> HSET config:core:aiembed:url url "http://localhost:8000/embeddings"
OK
# Or use a cloud service
redis-cli> HSET config:core:aiembed:url url "https://api.openai.com/v1/embeddings"
OK
```
### 2. Use Lance Commands with Automatic External Embedding
```bash
# Create a dataset
redis-cli> LANCE.CREATE products DIM 1536 SCHEMA name:string price:float category:string
OK
# Store text with automatic embedding (calls external service)
redis-cli> LANCE.STORE products TEXT "Wireless noise-canceling headphones with 30-hour battery" name:AirPods price:299.99 category:Electronics
"uuid-123-456"
# Search using text query (automatically embeds the query)
redis-cli> LANCE.SEARCH.TEXT products "best headphones for travel" K 5
1) "0.92"
2) "{\"id\":\"uuid-123\",\"name\":\"AirPods\",\"price\":\"299.99\"}"
# Get embeddings directly
redis-cli> LANCE.EMBED.TEXT "This text will be embedded"
1) "[0.123, 0.456, 0.789, ...]"
```
## External Embedding Service API Specification
The external embedding service should accept POST requests with this format:
```json
// Request
{
"texts": ["text1", "text2"], // Optional
"images": ["base64_img1"], // Optional
"model": "text-embedding-ada-002" // Optional
}
// Response
{
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
"model": "text-embedding-ada-002",
"usage": {
"prompt_tokens": 100,
"total_tokens": 100
}
}
```
## Error Handling
The implementation includes comprehensive error handling:
1. **Missing Configuration**: Clear error message if embedding URL not configured
2. **Service Failures**: Graceful handling of embedding service errors
3. **Timeout Protection**: 30-second timeout for embedding requests
4. **Retry Logic**: Could be added for resilience
## Benefits of This Approach
1. **Flexibility**: Supports any embedding service with compatible API
2. **Cost Control**: Use your preferred embedding provider
3. **Scalability**: Embedding service can be scaled independently
4. **Consistency**: All embeddings use the same configured service
5. **Security**: API keys and endpoints stored securely in Redis
This implementation ensures that all embedding operations go through the external service configured in Redis, providing a clean separation between the vector database functionality and the embedding generation.
TODO EXTRA:
- secret for the embedding service API key

489
src/admin_meta.rs Normal file
View File

@@ -0,0 +1,489 @@
use std::path::PathBuf;
use std::sync::{Arc, OnceLock, Mutex, RwLock};
use std::collections::HashMap;
use crate::error::DBError;
use crate::options;
use crate::rpc::Permissions;
use crate::storage::Storage;
use crate::storage_sled::SledStorage;
use crate::storage_trait::StorageBackend;
// Key builders
fn k_admin_next_id() -> &'static str {
"admin:next_id"
}
fn k_admin_dbs() -> &'static str {
"admin:dbs"
}
fn k_meta_db(id: u64) -> String {
format!("meta:db:{}", id)
}
fn k_meta_db_keys(id: u64) -> String {
format!("meta:db:{}:keys", id)
}
fn k_meta_db_enc(id: u64) -> String {
format!("meta:db:{}:enc", id)
}
// Global cache of admin DB 0 handles per base_dir to avoid sled/reDB file-lock contention
// and to correctly isolate different test instances with distinct directories.
static ADMIN_STORAGES: OnceLock<RwLock<HashMap<String, Arc<dyn StorageBackend>>>> = OnceLock::new();
// Global registry for data DB storages to avoid double-open across process.
static DATA_STORAGES: OnceLock<RwLock<HashMap<u64, Arc<dyn StorageBackend>>>> = OnceLock::new();
static DATA_INIT_LOCK: Mutex<()> = Mutex::new(());
fn init_admin_storage(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<Arc<dyn StorageBackend>, DBError> {
let db_file = PathBuf::from(base_dir).join("0.db");
if let Some(parent_dir) = db_file.parent() {
std::fs::create_dir_all(parent_dir).map_err(|e| {
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
})?;
}
let storage: Arc<dyn StorageBackend> = match backend {
options::BackendType::Redb => Arc::new(Storage::new(&db_file, true, Some(admin_secret))?),
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, true, Some(admin_secret))?),
options::BackendType::Tantivy => {
return Err(DBError("Admin DB 0 cannot use Tantivy backend".to_string()))
}
};
Ok(storage)
}
// Get or initialize a cached handle to admin DB 0 per base_dir (thread-safe, no double-open race)
pub fn open_admin_storage(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<Arc<dyn StorageBackend>, DBError> {
let map = ADMIN_STORAGES.get_or_init(|| RwLock::new(HashMap::new()));
// Fast path
if let Some(st) = map.read().unwrap().get(base_dir) {
return Ok(st.clone());
}
// Slow path with write lock
{
let mut w = map.write().unwrap();
if let Some(st) = w.get(base_dir) {
return Ok(st.clone());
}
// Detect existing 0.db backend by filesystem, if present.
let admin_path = PathBuf::from(base_dir).join("0.db");
let detected = if admin_path.exists() {
if admin_path.is_file() {
Some(options::BackendType::Redb)
} else if admin_path.is_dir() {
Some(options::BackendType::Sled)
} else {
None
}
} else {
None
};
let effective_backend = match detected {
Some(d) if d != backend => {
eprintln!(
"warning: Admin DB 0 at {} appears to be {:?}, but process default is {:?}. Using detected backend.",
admin_path.display(),
d,
backend
);
d
}
Some(d) => d,
None => backend, // First boot: use requested backend to initialize 0.db
};
let st = init_admin_storage(base_dir, effective_backend, admin_secret)?;
w.insert(base_dir.to_string(), st.clone());
Ok(st)
}
}
// Ensure admin structures exist in encrypted DB 0
pub fn ensure_bootstrap(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
// Initialize next id if missing
if !admin.exists(k_admin_next_id())? {
admin.set(k_admin_next_id().to_string(), "1".to_string())?;
}
// admin:dbs is a hash; it's fine if it doesn't exist (hlen -> 0)
Ok(())
}
// Get or initialize a shared handle to a data DB (> 0), avoiding double-open across subsystems
pub fn open_data_storage(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Arc<dyn StorageBackend>, DBError> {
if id == 0 {
return open_admin_storage(base_dir, backend, admin_secret);
}
// Validate existence in admin metadata
if !db_exists(base_dir, backend.clone(), admin_secret, id)? {
return Err(DBError(format!(
"Cannot open database instance {}, as that database instance does not exist.",
id
)));
}
let map = DATA_STORAGES.get_or_init(|| RwLock::new(HashMap::new()));
// Fast path
if let Some(st) = map.read().unwrap().get(&id) {
return Ok(st.clone());
}
// Slow path with init lock
let _guard = DATA_INIT_LOCK.lock().unwrap();
if let Some(st) = map.read().unwrap().get(&id) {
return Ok(st.clone());
}
// Resolve effective backend for this db id:
// 1) Try admin meta "backend" field
// 2) If missing, sniff filesystem (file => Redb, dir => Sled), then persist into admin meta
// 3) Fallback to requested 'backend' (startup default) if nothing else is known
let meta_backend = get_database_backend(base_dir, backend.clone(), admin_secret, id).ok().flatten();
let db_path = PathBuf::from(base_dir).join(format!("{}.db", id));
let sniffed_backend = if db_path.exists() {
if db_path.is_file() {
Some(options::BackendType::Redb)
} else if db_path.is_dir() {
Some(options::BackendType::Sled)
} else {
None
}
} else {
None
};
let effective_backend = meta_backend.clone().or(sniffed_backend).unwrap_or(backend.clone());
// If we had to sniff (i.e., meta missing), persist it for future robustness
if meta_backend.is_none() {
let _ = set_database_backend(base_dir, backend.clone(), admin_secret, id, effective_backend.clone());
}
// Warn if caller-provided backend differs from effective
if effective_backend != backend {
eprintln!(
"notice: Database {} backend resolved to {:?} (caller requested {:?}). Using resolved backend.",
id, effective_backend, backend
);
}
// Determine per-db encryption (from admin meta)
let enc = get_enc_key(base_dir, backend.clone(), admin_secret, id)?;
let should_encrypt = enc.is_some();
// Build database file path and ensure parent dir exists
let db_file = PathBuf::from(base_dir).join(format!("{}.db", id));
if let Some(parent_dir) = db_file.parent() {
std::fs::create_dir_all(parent_dir).map_err(|e| {
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
})?;
}
// Open storage using the effective backend
let storage: Arc<dyn StorageBackend> = match effective_backend {
options::BackendType::Redb => Arc::new(Storage::new(&db_file, should_encrypt, enc.as_deref())?),
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, should_encrypt, enc.as_deref())?),
options::BackendType::Tantivy => {
return Err(DBError("Tantivy backend has no KV storage; use FT.* commands only".to_string()))
}
};
// Publish to registry
map.write().unwrap().insert(id, storage.clone());
Ok(storage)
}
// Allocate the next DB id and persist new pointer
pub fn allocate_next_id(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<u64, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let cur = admin
.get(k_admin_next_id())?
.unwrap_or_else(|| "1".to_string());
let id: u64 = cur.parse().unwrap_or(1);
let next = id.checked_add(1).ok_or_else(|| DBError("next_id overflow".into()))?;
admin.set(k_admin_next_id().to_string(), next.to_string())?;
// Register into admin:dbs set/hash
let _ = admin.hset(k_admin_dbs(), vec![(id.to_string(), "1".to_string())])?;
// Default meta for the new db: public true
let meta_key = k_meta_db(id);
let _ = admin.hset(&meta_key, vec![("public".to_string(), "true".to_string())])?;
Ok(id)
}
// Check existence of a db id in admin:dbs
pub fn db_exists(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<bool, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
Ok(admin.hexists(k_admin_dbs(), &id.to_string())?)
}
// Get per-db encryption key, if any
pub fn get_enc_key(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Option<String>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
admin.get(&k_meta_db_enc(id))
}
// Set per-db encryption key (called during create)
pub fn set_enc_key(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
key: &str,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
admin.set(k_meta_db_enc(id), key.to_string())
}
// Set database public flag
pub fn set_database_public(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
public: bool,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
let _ = admin.hset(&mk, vec![("public".to_string(), public.to_string())])?;
Ok(())
}
// Persist per-db backend type in admin metadata (module-scope)
pub fn set_database_backend(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
db_backend: options::BackendType,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
let val = match db_backend {
options::BackendType::Redb => "Redb",
options::BackendType::Sled => "Sled",
options::BackendType::Tantivy => "Tantivy",
};
let _ = admin.hset(&mk, vec![("backend".to_string(), val.to_string())])?;
Ok(())
}
pub fn get_database_backend(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Option<options::BackendType>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
match admin.hget(&mk, "backend")? {
Some(s) if s == "Redb" => Ok(Some(options::BackendType::Redb)),
Some(s) if s == "Sled" => Ok(Some(options::BackendType::Sled)),
Some(s) if s == "Tantivy" => Ok(Some(options::BackendType::Tantivy)),
_ => Ok(None),
}
}
// Set database name
pub fn set_database_name(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
name: &str,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
let _ = admin.hset(&mk, vec![("name".to_string(), name.to_string())])?;
Ok(())
}
// Get database name
pub fn get_database_name(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Option<String>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let mk = k_meta_db(id);
admin.hget(&mk, "name")
}
// Internal: load public flag; default to true when meta missing
fn load_public(
admin: &Arc<dyn StorageBackend>,
id: u64,
) -> Result<bool, DBError> {
let mk = k_meta_db(id);
match admin.hget(&mk, "public")? {
Some(v) => Ok(v == "true"),
None => Ok(true),
}
}
// Add access key for db (value format: "Read:ts" or "ReadWrite:ts")
pub fn add_access_key(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
key_plain: &str,
perms: Permissions,
) -> Result<(), DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let hash = crate::rpc::hash_key(key_plain);
let v = match perms {
Permissions::Read => format!("Read:{}", now_secs()),
Permissions::ReadWrite => format!("ReadWrite:{}", now_secs()),
};
let _ = admin.hset(&k_meta_db_keys(id), vec![(hash, v)])?;
Ok(())
}
// Delete access key by hash
pub fn delete_access_key(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
key_hash: &str,
) -> Result<bool, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let n = admin.hdel(&k_meta_db_keys(id), vec![key_hash.to_string()])?;
Ok(n > 0)
}
// List access keys, returning (hash, perms, created_at_secs)
pub fn list_access_keys(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
) -> Result<Vec<(String, Permissions, u64)>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let pairs = admin.hgetall(&k_meta_db_keys(id))?;
let mut out = Vec::new();
for (hash, val) in pairs {
let (perm, ts) = parse_perm_value(&val);
out.push((hash, perm, ts));
}
Ok(out)
}
// Verify access permission for db id with optional key
// Returns:
// - Ok(Some(Permissions)) when access is allowed
// - Ok(None) when not allowed or db missing (caller can distinguish by calling db_exists)
pub fn verify_access(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
id: u64,
key_opt: Option<&str>,
) -> Result<Option<Permissions>, DBError> {
// Admin DB 0: require exact admin_secret
if id == 0 {
if let Some(k) = key_opt {
if k == admin_secret {
return Ok(Some(Permissions::ReadWrite));
}
}
return Ok(None);
}
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
if !admin.hexists(k_admin_dbs(), &id.to_string())? {
return Ok(None);
}
// Public?
if load_public(&admin, id)? {
return Ok(Some(Permissions::ReadWrite));
}
// Private: require key and verify
if let Some(k) = key_opt {
let hash = crate::rpc::hash_key(k);
if let Some(v) = admin.hget(&k_meta_db_keys(id), &hash)? {
let (perm, _ts) = parse_perm_value(&v);
return Ok(Some(perm));
}
}
Ok(None)
}
// Enumerate all db ids
pub fn list_dbs(
base_dir: &str,
backend: options::BackendType,
admin_secret: &str,
) -> Result<Vec<u64>, DBError> {
let admin = open_admin_storage(base_dir, backend, admin_secret)?;
let ids = admin.hkeys(k_admin_dbs())?;
let mut out = Vec::new();
for s in ids {
if let Ok(v) = s.parse() {
out.push(v);
}
}
Ok(out)
}
// Helper: parse permission value "Read:ts" or "ReadWrite:ts"
fn parse_perm_value(v: &str) -> (Permissions, u64) {
let mut parts = v.split(':');
let p = parts.next().unwrap_or("Read");
let ts = parts
.next()
.and_then(|s| s.parse().ok())
.unwrap_or(0u64);
let perm = match p {
"ReadWrite" => Permissions::ReadWrite,
_ => Permissions::Read,
};
(perm, ts)
}
fn now_secs() -> u64 {
use std::time::{SystemTime, UNIX_EPOCH};
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}

536
src/age.rs Normal file
View File

@@ -0,0 +1,536 @@
//! age.rs — AGE (rage) helpers + persistent key management for your mini-Redis.
//
// Features:
// - X25519 encryption/decryption (age style)
// - Ed25519 detached signatures + verification
// - Persistent named keys in DB (strings):
// age:key:{name} -> X25519 recipient (public encryption key, "age1...")
// age:privkey:{name} -> X25519 identity (secret encryption key, "AGE-SECRET-KEY-1...")
// age:signpub:{name} -> Ed25519 verify pubkey (public, used to verify signatures)
// age:signpriv:{name} -> Ed25519 signing secret key (private, used to sign)
// - Base64 wrapping for ciphertext/signature binary blobs.
use std::str::FromStr;
use secrecy::ExposeSecret;
use age::{Decryptor, Encryptor};
use age::x25519;
use ed25519_dalek::{Signature, Signer, Verifier, SigningKey, VerifyingKey};
use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
use std::collections::HashSet;
use std::convert::TryInto;
use crate::protocol::Protocol;
use crate::server::Server;
use crate::error::DBError;
// ---------- Internal helpers ----------
#[derive(Debug)]
pub enum AgeWireError {
ParseKey,
Crypto(String),
Utf8,
SignatureLen,
NotFound(&'static str), // which kind of key was missing
Storage(String),
}
impl AgeWireError {
fn to_protocol(self) -> Protocol {
match self {
AgeWireError::ParseKey => Protocol::err("ERR age: invalid key"),
AgeWireError::Crypto(e) => Protocol::err(&format!("ERR age: {e}")),
AgeWireError::Utf8 => Protocol::err("ERR age: invalid UTF-8 plaintext"),
AgeWireError::SignatureLen => Protocol::err("ERR age: bad signature length"),
AgeWireError::NotFound(w) => Protocol::err(&format!("ERR age: missing {w}")),
AgeWireError::Storage(e) => Protocol::err(&format!("ERR storage: {e}")),
}
}
}
fn parse_recipient(s: &str) -> Result<x25519::Recipient, AgeWireError> {
x25519::Recipient::from_str(s).map_err(|_| AgeWireError::ParseKey)
}
fn parse_identity(s: &str) -> Result<x25519::Identity, AgeWireError> {
x25519::Identity::from_str(s).map_err(|_| AgeWireError::ParseKey)
}
fn parse_ed25519_signing_key(s: &str) -> Result<SigningKey, AgeWireError> {
// Parse base64-encoded signing key
let bytes = B64.decode(s).map_err(|_| AgeWireError::ParseKey)?;
if bytes.len() != 32 {
return Err(AgeWireError::ParseKey);
}
let key_bytes: [u8; 32] = bytes.try_into().map_err(|_| AgeWireError::ParseKey)?;
Ok(SigningKey::from_bytes(&key_bytes))
}
fn parse_ed25519_verifying_key(s: &str) -> Result<VerifyingKey, AgeWireError> {
// Parse base64-encoded verifying key
let bytes = B64.decode(s).map_err(|_| AgeWireError::ParseKey)?;
if bytes.len() != 32 {
return Err(AgeWireError::ParseKey);
}
let key_bytes: [u8; 32] = bytes.try_into().map_err(|_| AgeWireError::ParseKey)?;
VerifyingKey::from_bytes(&key_bytes).map_err(|_| AgeWireError::ParseKey)
}
// ---------- Derivation + Raw X25519 (Ed25519 -> X25519) ----------
//
// We deterministically derive an X25519 keypair from an Ed25519 SigningKey.
// We persist the X25519 public/secret as base64-encoded 32-byte raw values
// (no "age1..."/"AGE-SECRET-KEY-1..." formatting). Name-based encrypt/decrypt
// uses these raw values directly via x25519-dalek + ChaCha20Poly1305.
use chacha20poly1305::{aead::{Aead, KeyInit}, ChaCha20Poly1305, Key, Nonce};
use sha2::{Digest, Sha256};
use x25519_dalek::{PublicKey as XPublicKey, StaticSecret as XStaticSecret};
fn derive_x25519_raw_from_ed25519(sk: &SigningKey) -> ([u8; 32], [u8; 32]) {
// X25519 secret scalar (clamped) from Ed25519 secret
let scalar: [u8; 32] = sk.to_scalar_bytes();
// Build X25519 secret/public using dalek
let xsec = XStaticSecret::from(scalar);
let xpub = XPublicKey::from(&xsec);
(xpub.to_bytes(), xsec.to_bytes())
}
fn derive_x25519_raw_b64_from_ed25519(sk: &SigningKey) -> (String, String) {
let (xpub, xsec) = derive_x25519_raw_from_ed25519(sk);
(B64.encode(xpub), B64.encode(xsec))
}
// Helper: detect whether a stored key looks like an age-formatted string
fn looks_like_age_format(s: &str) -> bool {
s.starts_with("age1") || s.starts_with("AGE-SECRET-KEY-1")
}
// Our container format for name-based raw X25519 encryption:
// bytes = "HDBX1" (5) || eph_pub(32) || nonce(12) || ciphertext(..)
// Entire blob is base64-encoded for transport.
const HDBX1_MAGIC: &[u8; 5] = b"HDBX1";
fn encrypt_b64_with_x25519_raw(recip_pub_b64: &str, msg: &str) -> Result<String, AgeWireError> {
use rand::RngCore;
use rand::rngs::OsRng;
// Parse recipient public key (raw 32 bytes, base64)
let recip_pub_bytes = B64.decode(recip_pub_b64).map_err(|_| AgeWireError::ParseKey)?;
if recip_pub_bytes.len() != 32 { return Err(AgeWireError::ParseKey); }
let recip_pub_arr: [u8; 32] = recip_pub_bytes.as_slice().try_into().map_err(|_| AgeWireError::ParseKey)?;
let recip_pub: XPublicKey = XPublicKey::from(recip_pub_arr);
// Generate ephemeral X25519 keypair
let mut eph_sec_bytes = [0u8; 32];
OsRng.fill_bytes(&mut eph_sec_bytes);
let eph_sec = XStaticSecret::from(eph_sec_bytes);
let eph_pub = XPublicKey::from(&eph_sec);
// ECDH
let shared = eph_sec.diffie_hellman(&recip_pub);
// Derive symmetric key via SHA-256 over context + shared + parties
let mut hasher = Sha256::default();
hasher.update(b"herodb-x25519-v1");
hasher.update(shared.as_bytes());
hasher.update(eph_pub.as_bytes());
hasher.update(recip_pub.as_bytes());
let key_bytes = hasher.finalize();
let key = Key::from_slice(&key_bytes[..32]);
// Nonce (12 bytes)
let mut nonce_bytes = [0u8; 12];
OsRng.fill_bytes(&mut nonce_bytes);
let nonce = Nonce::from_slice(&nonce_bytes);
// Encrypt
let cipher = ChaCha20Poly1305::new(key);
let ct = cipher.encrypt(nonce, msg.as_bytes())
.map_err(|e| AgeWireError::Crypto(format!("encrypt: {e}")))?;
// Assemble container
let mut out = Vec::with_capacity(5 + 32 + 12 + ct.len());
out.extend_from_slice(HDBX1_MAGIC);
out.extend_from_slice(eph_pub.as_bytes());
out.extend_from_slice(&nonce_bytes);
out.extend_from_slice(&ct);
Ok(B64.encode(out))
}
fn decrypt_b64_with_x25519_raw(identity_sec_b64: &str, ct_b64: &str) -> Result<String, AgeWireError> {
// Parse X25519 secret (raw 32 bytes, base64)
let sec_bytes = B64.decode(identity_sec_b64).map_err(|_| AgeWireError::ParseKey)?;
if sec_bytes.len() != 32 { return Err(AgeWireError::ParseKey); }
let sec_arr: [u8; 32] = sec_bytes.as_slice().try_into().map_err(|_| AgeWireError::ParseKey)?;
let xsec = XStaticSecret::from(sec_arr);
let xpub = XPublicKey::from(&xsec); // self public
// Decode container
let blob = B64.decode(ct_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
if blob.len() < 5 + 32 + 12 { return Err(AgeWireError::Crypto("ciphertext too short".to_string())); }
if &blob[..5] != HDBX1_MAGIC { return Err(AgeWireError::Crypto("bad header".to_string())); }
let eph_pub_arr: [u8; 32] = blob[5..5+32].try_into().map_err(|_| AgeWireError::Crypto("bad eph pub".to_string()))?;
let eph_pub = XPublicKey::from(eph_pub_arr);
let nonce_bytes: [u8; 12] = blob[5+32..5+32+12].try_into().unwrap();
let ct = &blob[5+32+12..];
// Recompute shared + key
let shared = xsec.diffie_hellman(&eph_pub);
let mut hasher = Sha256::default();
hasher.update(b"herodb-x25519-v1");
hasher.update(shared.as_bytes());
hasher.update(eph_pub.as_bytes());
hasher.update(xpub.as_bytes());
let key_bytes = hasher.finalize();
let key = Key::from_slice(&key_bytes[..32]);
// Decrypt
let cipher = ChaCha20Poly1305::new(key);
let nonce = Nonce::from_slice(&nonce_bytes);
let pt = cipher.decrypt(nonce, ct)
.map_err(|e| AgeWireError::Crypto(format!("decrypt: {e}")))?;
String::from_utf8(pt).map_err(|_| AgeWireError::Utf8)
}
// ---------- Stateless crypto helpers (string in/out) ----------
pub fn gen_enc_keypair() -> (String, String) {
let id = x25519::Identity::generate();
let pk = id.to_public();
(pk.to_string(), id.to_string().expose_secret().to_string()) // (recipient, identity)
}
pub fn gen_sign_keypair() -> (String, String) {
use rand::RngCore;
use rand::rngs::OsRng;
// Generate random 32 bytes for the signing key
let mut secret_bytes = [0u8; 32];
OsRng.fill_bytes(&mut secret_bytes);
let signing_key = SigningKey::from_bytes(&secret_bytes);
let verifying_key = signing_key.verifying_key();
// Encode as base64 for storage
let signing_key_b64 = B64.encode(signing_key.to_bytes());
let verifying_key_b64 = B64.encode(verifying_key.to_bytes());
(verifying_key_b64, signing_key_b64) // (verify_pub, signing_secret)
}
/// Encrypt `msg` for `recipient_str` (X25519). Returns base64(ciphertext).
pub fn encrypt_b64(recipient_str: &str, msg: &str) -> Result<String, AgeWireError> {
let recipient = parse_recipient(recipient_str)?;
let enc = Encryptor::with_recipients(vec![Box::new(recipient)])
.expect("failed to create encryptor"); // Handle Option<Encryptor>
let mut out = Vec::new();
{
use std::io::Write;
let mut w = enc.wrap_output(&mut out).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
w.write_all(msg.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
w.finish().map_err(|e| AgeWireError::Crypto(e.to_string()))?;
}
Ok(B64.encode(out))
}
/// Decrypt base64(ciphertext) with `identity_str`. Returns plaintext String.
pub fn decrypt_b64(identity_str: &str, ct_b64: &str) -> Result<String, AgeWireError> {
let id = parse_identity(identity_str)?;
let ct = B64.decode(ct_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
let dec = Decryptor::new(&ct[..]).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
// The decrypt method returns a Result<StreamReader, DecryptError>
let mut r = match dec {
Decryptor::Recipients(d) => d.decrypt(std::iter::once(&id as &dyn age::Identity))
.map_err(|e| AgeWireError::Crypto(e.to_string()))?,
Decryptor::Passphrase(_) => return Err(AgeWireError::Crypto("Expected recipients, got passphrase".to_string())),
};
let mut pt = Vec::new();
use std::io::Read;
r.read_to_end(&mut pt).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
String::from_utf8(pt).map_err(|_| AgeWireError::Utf8)
}
/// Sign bytes of `msg` (detached). Returns base64(signature bytes, 64 bytes).
pub fn sign_b64(signing_secret_str: &str, msg: &str) -> Result<String, AgeWireError> {
let signing_key = parse_ed25519_signing_key(signing_secret_str)?;
let sig = signing_key.sign(msg.as_bytes());
Ok(B64.encode(sig.to_bytes()))
}
/// Verify detached signature (base64) for `msg` with pubkey.
pub fn verify_b64(verify_pub_str: &str, msg: &str, sig_b64: &str) -> Result<bool, AgeWireError> {
let verifying_key = parse_ed25519_verifying_key(verify_pub_str)?;
let sig_bytes = B64.decode(sig_b64.as_bytes()).map_err(|e| AgeWireError::Crypto(e.to_string()))?;
if sig_bytes.len() != 64 {
return Err(AgeWireError::SignatureLen);
}
let sig = Signature::from_bytes(sig_bytes[..].try_into().unwrap());
Ok(verifying_key.verify(msg.as_bytes(), &sig).is_ok())
}
// ---------- Storage helpers ----------
fn sget(server: &Server, key: &str) -> Result<Option<String>, AgeWireError> {
let st = server.current_storage().map_err(|e| AgeWireError::Storage(e.0))?;
st.get(key).map_err(|e| AgeWireError::Storage(e.0))
}
fn sset(server: &Server, key: &str, val: &str) -> Result<(), AgeWireError> {
let st = server.current_storage().map_err(|e| AgeWireError::Storage(e.0))?;
st.set(key.to_string(), val.to_string()).map_err(|e| AgeWireError::Storage(e.0))
}
fn enc_pub_key_key(name: &str) -> String { format!("age:key:{name}") }
fn enc_priv_key_key(name: &str) -> String { format!("age:privkey:{name}") }
fn sign_pub_key_key(name: &str) -> String { format!("age:signpub:{name}") }
fn sign_priv_key_key(name: &str) -> String { format!("age:signpriv:{name}") }
// ---------- Command handlers (RESP Protocol) ----------
// Basic (stateless) ones kept for completeness
pub async fn cmd_age_genenc() -> Protocol {
let (recip, ident) = gen_enc_keypair();
Protocol::Array(vec![Protocol::BulkString(recip), Protocol::BulkString(ident)])
}
pub async fn cmd_age_gensign() -> Protocol {
let (verify, secret) = gen_sign_keypair();
Protocol::Array(vec![Protocol::BulkString(verify), Protocol::BulkString(secret)])
}
pub async fn cmd_age_encrypt(recipient: &str, message: &str) -> Protocol {
match encrypt_b64(recipient, message) {
Ok(b64) => Protocol::BulkString(b64),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_decrypt(identity: &str, ct_b64: &str) -> Protocol {
match decrypt_b64(identity, ct_b64) {
Ok(pt) => Protocol::BulkString(pt),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_sign(secret: &str, message: &str) -> Protocol {
match sign_b64(secret, message) {
Ok(b64sig) => Protocol::BulkString(b64sig),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_verify(verify_pub: &str, message: &str, sig_b64: &str) -> Protocol {
match verify_b64(verify_pub, message, sig_b64) {
Ok(true) => Protocol::SimpleString("1".to_string()),
Ok(false) => Protocol::SimpleString("0".to_string()),
Err(e) => e.to_protocol(),
}
}
// ---------- NEW: unified stateless generator (Ed25519 + derived X25519 raw) ----------
//
// Returns 4-tuple:
// [ verify_pub_b64 (32B), signpriv_b64 (32B), x25519_pub_b64 (32B), x25519_sec_b64 (32B) ]
// No persistence (stateless).
pub async fn cmd_age_genkey() -> Protocol {
use rand::RngCore;
use rand::rngs::OsRng;
let mut secret_bytes = [0u8; 32];
OsRng.fill_bytes(&mut secret_bytes);
let signing_key = SigningKey::from_bytes(&secret_bytes);
let verifying_key = signing_key.verifying_key();
let verify_b64 = B64.encode(verifying_key.to_bytes());
let sign_b64 = B64.encode(signing_key.to_bytes());
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&signing_key);
Protocol::Array(vec![
Protocol::BulkString(verify_b64),
Protocol::BulkString(sign_b64),
Protocol::BulkString(xpub_b64),
Protocol::BulkString(xsec_b64),
])
}
// ---------- NEW: Persistent, named-key commands ----------
pub async fn cmd_age_keygen(server: &Server, name: &str) -> Protocol {
use rand::RngCore;
use rand::rngs::OsRng;
// Generate Ed25519 keypair
let mut secret_bytes = [0u8; 32];
OsRng.fill_bytes(&mut secret_bytes);
let signing_key = SigningKey::from_bytes(&secret_bytes);
let verifying_key = signing_key.verifying_key();
// Encode Ed25519 as base64 (32 bytes)
let verify_b64 = B64.encode(verifying_key.to_bytes());
let sign_b64 = B64.encode(signing_key.to_bytes());
// Derive X25519 raw (32-byte) keys and encode as base64
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&signing_key);
// Decode to create age-formatted strings
let xpub_bytes = B64.decode(&xpub_b64).unwrap();
let xsec_bytes = B64.decode(&xsec_b64).unwrap();
let xpub_arr: [u8; 32] = xpub_bytes.as_slice().try_into().unwrap();
let xsec_arr: [u8; 32] = xsec_bytes.as_slice().try_into().unwrap();
let recip_str = format!("age1{}", B64.encode(xpub_arr));
let ident_str = format!("AGE-SECRET-KEY-1{}", B64.encode(xsec_arr));
// Persist Ed25519 and derived X25519 (key-managed mode)
if let Err(e) = sset(server, &sign_pub_key_key(name), &verify_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &sign_priv_key_key(name), &sign_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
// Return [recipient, identity] in age format
Protocol::Array(vec![
Protocol::BulkString(recip_str),
Protocol::BulkString(ident_str),
])
}
pub async fn cmd_age_signkeygen(server: &Server, name: &str) -> Protocol {
let (verify, secret) = gen_sign_keypair();
if let Err(e) = sset(server, &sign_pub_key_key(name), &verify) { return e.to_protocol(); }
if let Err(e) = sset(server, &sign_priv_key_key(name), &secret) { return e.to_protocol(); }
Protocol::Array(vec![Protocol::BulkString(verify), Protocol::BulkString(secret)])
}
pub async fn cmd_age_encrypt_name(server: &Server, name: &str, message: &str) -> Protocol {
// Load stored recipient (could be raw b64 32-byte or "age1..." from legacy)
let recip_or_b64 = match sget(server, &enc_pub_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => {
// Derive from stored Ed25519 if present, then persist
match sget(server, &sign_priv_key_key(name)) {
Ok(Some(sign_b64)) => {
let sk = match parse_ed25519_signing_key(&sign_b64) {
Ok(k) => k,
Err(e) => return e.to_protocol(),
};
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&sk);
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
xpub_b64
}
Ok(None) => return AgeWireError::NotFound("recipient (age:key:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
}
}
Err(e) => return e.to_protocol(),
};
if looks_like_age_format(&recip_or_b64) {
match encrypt_b64(&recip_or_b64, message) {
Ok(ct) => Protocol::BulkString(ct),
Err(e) => e.to_protocol(),
}
} else {
match encrypt_b64_with_x25519_raw(&recip_or_b64, message) {
Ok(ct) => Protocol::BulkString(ct),
Err(e) => e.to_protocol(),
}
}
}
pub async fn cmd_age_decrypt_name(server: &Server, name: &str, ct_b64: &str) -> Protocol {
// Load stored identity (could be raw b64 32-byte or "AGE-SECRET-KEY-1..." from legacy)
let ident_or_b64 = match sget(server, &enc_priv_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => {
// Derive from stored Ed25519 if present, then persist
match sget(server, &sign_priv_key_key(name)) {
Ok(Some(sign_b64)) => {
let sk = match parse_ed25519_signing_key(&sign_b64) {
Ok(k) => k,
Err(e) => return e.to_protocol(),
};
let (xpub_b64, xsec_b64) = derive_x25519_raw_b64_from_ed25519(&sk);
if let Err(e) = sset(server, &enc_pub_key_key(name), &xpub_b64) { return e.to_protocol(); }
if let Err(e) = sset(server, &enc_priv_key_key(name), &xsec_b64) { return e.to_protocol(); }
xsec_b64
}
Ok(None) => return AgeWireError::NotFound("identity (age:privkey:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
}
}
Err(e) => return e.to_protocol(),
};
if looks_like_age_format(&ident_or_b64) {
match decrypt_b64(&ident_or_b64, ct_b64) {
Ok(pt) => Protocol::BulkString(pt),
Err(e) => e.to_protocol(),
}
} else {
match decrypt_b64_with_x25519_raw(&ident_or_b64, ct_b64) {
Ok(pt) => Protocol::BulkString(pt),
Err(e) => e.to_protocol(),
}
}
}
pub async fn cmd_age_sign_name(server: &Server, name: &str, message: &str) -> Protocol {
let sec = match sget(server, &sign_priv_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => return AgeWireError::NotFound("signing secret (age:signpriv:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
};
match sign_b64(&sec, message) {
Ok(sig) => Protocol::BulkString(sig),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_verify_name(server: &Server, name: &str, message: &str, sig_b64: &str) -> Protocol {
let pubk = match sget(server, &sign_pub_key_key(name)) {
Ok(Some(v)) => v,
Ok(None) => return AgeWireError::NotFound("verify pubkey (age:signpub:{name})").to_protocol(),
Err(e) => return e.to_protocol(),
};
match verify_b64(&pubk, message, sig_b64) {
Ok(true) => Protocol::SimpleString("1".to_string()),
Ok(false) => Protocol::SimpleString("0".to_string()),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_age_list(server: &Server) -> Protocol {
// Return a flat, deduplicated, sorted list of managed key names (no labels)
let st = match server.current_storage() { Ok(s) => s, Err(e) => return Protocol::err(&e.0) };
let pull = |pat: &str, prefix: &str| -> Result<Vec<String>, DBError> {
let keys = st.keys(pat)?;
let mut names: Vec<String> = keys
.into_iter()
.filter_map(|k| k.strip_prefix(prefix).map(|x| x.to_string()))
.collect();
names.sort();
Ok(names)
};
let encpub = match pull("age:key:*", "age:key:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let encpriv = match pull("age:privkey:*", "age:privkey:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let signpub = match pull("age:signpub:*", "age:signpub:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let signpriv = match pull("age:signpriv:*", "age:signpriv:") { Ok(v) => v, Err(e)=> return Protocol::err(&e.0) };
let mut set: HashSet<String> = HashSet::new();
for n in encpub.into_iter().chain(encpriv).chain(signpub).chain(signpriv) {
set.insert(n);
}
let mut names: Vec<String> = set.into_iter().collect();
names.sort();
Protocol::Array(names.into_iter().map(Protocol::BulkString).collect())
}

View File

@@ -1,5 +1,4 @@
use crate::{error::DBError, protocol::Protocol, server::Server};
use serde::Serialize;
use tokio::time::{timeout, Duration};
use futures::future::select_all;
@@ -7,7 +6,7 @@ use futures::future::select_all;
pub enum Cmd {
Ping,
Echo(String),
Select(u64), // Changed from u16 to u64
Select(u64, Option<String>), // db_index, optional_key
Get(String),
Set(String, String),
SetPx(String, String, u128),
@@ -72,12 +71,13 @@ pub enum Cmd {
// AGE (rage) commands — stateless
AgeGenEnc,
AgeGenSign,
AgeGenKey, // unified stateless: returns [verify_b64, signpriv_b64, x25519_pub_b64, x25519_sec_b64]
AgeEncrypt(String, String), // recipient, message
AgeDecrypt(String, String), // identity, ciphertext_b64
AgeSign(String, String), // signing_secret, message
AgeVerify(String, String, String), // verify_pub, message, signature_b64
// NEW: persistent named-key commands
// Persistent named-key commands
AgeKeygen(String), // name
AgeSignKeygen(String), // name
AgeEncryptName(String, String), // name, message
@@ -85,6 +85,47 @@ pub enum Cmd {
AgeSignName(String, String), // name, message
AgeVerifyName(String, String, String), // name, message, signature_b64
AgeList,
// SYM (symmetric) commands — stateless
// Raw 32-byte key provided as base64; ciphertext returned as base64
SymKeygen,
SymEncrypt(String, String), // key_b64, message
SymDecrypt(String, String), // key_b64, ciphertext_b64
// Full-text search commands with schema support
FtCreate {
index_name: String,
schema: Vec<(String, String, Vec<String>)>, // (field_name, field_type, options)
},
FtAdd {
index_name: String,
doc_id: String,
score: f64,
fields: std::collections::HashMap<String, String>,
},
FtSearch {
index_name: String,
query: String,
filters: Vec<(String, String)>, // field, value pairs
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
},
FtDel(String, String), // index_name, doc_id
FtInfo(String), // index_name
FtDrop(String), // index_name
FtAlter {
index_name: String,
field_name: String,
field_type: String,
options: Vec<String>,
},
FtAggregate {
index_name: String,
query: String,
group_by: Vec<String>,
reducers: Vec<String>,
}
}
impl Cmd {
@@ -99,11 +140,18 @@ impl Cmd {
Ok((
match cmd[0].to_lowercase().as_str() {
"select" => {
if cmd.len() != 2 {
if cmd.len() < 2 || cmd.len() > 4 {
return Err(DBError("wrong number of arguments for SELECT".to_string()));
}
let idx = cmd[1].parse::<u64>().map_err(|_| DBError("ERR DB index is not an integer".to_string()))?;
Cmd::Select(idx)
let key = if cmd.len() == 4 && cmd[2].to_lowercase() == "key" {
Some(cmd[3].clone())
} else if cmd.len() == 2 {
None
} else {
return Err(DBError("ERR syntax error".to_string()));
};
Cmd::Select(idx, key)
}
"echo" => Cmd::Echo(cmd[1].clone()),
"ping" => Cmd::Ping,
@@ -590,6 +638,8 @@ impl Cmd {
Cmd::AgeGenEnc }
"gensign" => { if cmd.len() != 2 { return Err(DBError("AGE GENSIGN takes no args".to_string())); }
Cmd::AgeGenSign }
"genkey" => { if cmd.len() != 2 { return Err(DBError("AGE GENKEY takes no args".to_string())); }
Cmd::AgeGenKey }
"encrypt" => { if cmd.len() != 4 { return Err(DBError("AGE ENCRYPT <recipient> <message>".to_string())); }
Cmd::AgeEncrypt(cmd[2].clone(), cmd[3].clone()) }
"decrypt" => { if cmd.len() != 4 { return Err(DBError("AGE DECRYPT <identity> <ciphertext_b64>".to_string())); }
@@ -617,6 +667,154 @@ impl Cmd {
_ => return Err(DBError(format!("unsupported AGE subcommand {:?}", cmd))),
}
}
"sym" => {
if cmd.len() < 2 {
return Err(DBError("wrong number of arguments for SYM".to_string()));
}
match cmd[1].to_lowercase().as_str() {
"keygen" => { if cmd.len() != 2 { return Err(DBError("SYM KEYGEN takes no args".to_string())); }
Cmd::SymKeygen }
"encrypt" => { if cmd.len() != 4 { return Err(DBError("SYM ENCRYPT <key_b64> <message>".to_string())); }
Cmd::SymEncrypt(cmd[2].clone(), cmd[3].clone()) }
"decrypt" => { if cmd.len() != 4 { return Err(DBError("SYM DECRYPT <key_b64> <ciphertext_b64>".to_string())); }
Cmd::SymDecrypt(cmd[2].clone(), cmd[3].clone()) }
_ => return Err(DBError(format!("unsupported SYM subcommand {:?}", cmd))),
}
}
"ft.create" => {
if cmd.len() < 4 || cmd[2].to_uppercase() != "SCHEMA" {
return Err(DBError("ERR FT.CREATE requires: indexname SCHEMA field1 type1 [options] ...".to_string()));
}
let index_name = cmd[1].clone();
let mut schema = Vec::new();
let mut i = 3;
while i < cmd.len() {
if i + 1 >= cmd.len() {
return Err(DBError("ERR incomplete field definition".to_string()));
}
let field_name = cmd[i].clone();
let field_type = cmd[i + 1].to_uppercase();
let mut options = Vec::new();
i += 2;
// Parse field options until we hit another field name or end
while i < cmd.len()
&& ["WEIGHT","SORTABLE","NOINDEX","SEPARATOR","CASESENSITIVE"]
.contains(&cmd[i].to_uppercase().as_str())
{
options.push(cmd[i].to_uppercase());
i += 1;
// If this option takes a value, consume it too
if i > 0 && ["SEPARATOR","WEIGHT"].contains(&cmd[i - 1].to_uppercase().as_str()) && i < cmd.len() {
options.push(cmd[i].clone());
i += 1;
}
}
schema.push((field_name, field_type, options));
}
Cmd::FtCreate { index_name, schema }
}
"ft.add" => {
if cmd.len() < 5 {
return Err(DBError("ERR FT.ADD requires: index_name doc_id score field value ...".to_string()));
}
let index_name = cmd[1].clone();
let doc_id = cmd[2].clone();
let score = cmd[3].parse::<f64>().map_err(|_| DBError("ERR score must be a number".to_string()))?;
let mut fields = std::collections::HashMap::new();
let mut i = 4;
while i + 1 < cmd.len() {
fields.insert(cmd[i].clone(), cmd[i + 1].clone());
i += 2;
}
Cmd::FtAdd { index_name, doc_id, score, fields }
}
"ft.search" => {
if cmd.len() < 3 {
return Err(DBError("ERR FT.SEARCH requires: index_name query [options]".to_string()));
}
let index_name = cmd[1].clone();
let query = cmd[2].clone();
let mut filters = Vec::new();
let mut limit = None;
let mut offset = None;
let mut return_fields = None;
let mut i = 3;
while i < cmd.len() {
match cmd[i].to_uppercase().as_str() {
"FILTER" => {
if i + 2 >= cmd.len() {
return Err(DBError("ERR FILTER requires field and value".to_string()));
}
filters.push((cmd[i + 1].clone(), cmd[i + 2].clone()));
i += 3;
}
"LIMIT" => {
if i + 2 >= cmd.len() {
return Err(DBError("ERR LIMIT requires offset and num".to_string()));
}
offset = Some(cmd[i + 1].parse().unwrap_or(0));
limit = Some(cmd[i + 2].parse().unwrap_or(10));
i += 3;
}
"RETURN" => {
if i + 1 >= cmd.len() {
return Err(DBError("ERR RETURN requires field count".to_string()));
}
let count: usize = cmd[i + 1].parse().unwrap_or(0);
i += 2;
let mut fields = Vec::new();
for _ in 0..count {
if i < cmd.len() {
fields.push(cmd[i].clone());
i += 1;
}
}
return_fields = Some(fields);
}
_ => i += 1,
}
}
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields }
}
"ft.del" => {
if cmd.len() != 3 {
return Err(DBError("ERR FT.DEL requires: index_name doc_id".to_string()));
}
Cmd::FtDel(cmd[1].clone(), cmd[2].clone())
}
"ft.info" => {
if cmd.len() != 2 {
return Err(DBError("ERR FT.INFO requires: index_name".to_string()));
}
Cmd::FtInfo(cmd[1].clone())
}
"ft.drop" => {
if cmd.len() != 2 {
return Err(DBError("ERR FT.DROP requires: index_name".to_string()));
}
Cmd::FtDrop(cmd[1].clone())
}
"ft.alter" => {
if cmd.len() < 5 {
return Err(DBError("ERR FT.ALTER requires: index_name field_name field_type [options]".to_string()));
}
let index_name = cmd[1].clone();
let field_name = cmd[2].clone();
let field_type = cmd[3].clone();
let options = if cmd.len() > 4 { cmd[4..].to_vec() } else { vec![] };
Cmd::FtAlter { index_name, field_name, field_type, options }
}
"ft.aggregate" => {
if cmd.len() < 3 {
return Err(DBError("ERR FT.AGGREGATE requires: index_name query [options]".to_string()));
}
let index_name = cmd[1].clone();
let query = cmd[2].clone();
// Minimal parse for now
let group_by = Vec::new();
let reducers = Vec::new();
Cmd::FtAggregate { index_name, query, group_by, reducers }
}
_ => Cmd::Unknow(cmd[0].clone()),
},
protocol,
@@ -642,8 +840,61 @@ impl Cmd {
return Ok(Protocol::SimpleString("QUEUED".to_string()));
}
// Backend gating for Tantivy-only DBs: allow only FT.* and basic control/info commands
// Determine per-selected-db backend via admin meta (not process default).
let is_tantivy_backend = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if is_tantivy_backend {
match &self {
Cmd::Select(..)
| Cmd::Quit
| Cmd::Client(..)
| Cmd::ClientSetName(..)
| Cmd::ClientGetName
| Cmd::Command(..)
| Cmd::Info(..)
| Cmd::FtCreate { .. }
| Cmd::FtAdd { .. }
| Cmd::FtSearch { .. }
| Cmd::FtDel(..)
| Cmd::FtInfo(..)
| Cmd::FtDrop(..)
| Cmd::FtAlter { .. }
| Cmd::FtAggregate { .. } => {}
_ => {
return Ok(Protocol::err("ERR backend is Tantivy; only FT.* commands are allowed"));
}
}
}
// If selected DB is not Tantivy, forbid all FT.* commands here.
if !is_tantivy_backend {
match &self {
Cmd::FtCreate { .. }
| Cmd::FtAdd { .. }
| Cmd::FtSearch { .. }
| Cmd::FtDel(..)
| Cmd::FtInfo(..)
| Cmd::FtDrop(..)
| Cmd::FtAlter { .. }
| Cmd::FtAggregate { .. } => {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
_ => {}
}
}
match self {
Cmd::Select(db) => select_cmd(server, db).await,
Cmd::Select(db, key) => select_cmd(server, db, key).await,
Cmd::Ping => Ok(Protocol::SimpleString("PONG".to_string())),
Cmd::Echo(s) => Ok(Protocol::BulkString(s)),
Cmd::Get(k) => get_cmd(server, &k).await,
@@ -718,6 +969,7 @@ impl Cmd {
// AGE (rage): stateless
Cmd::AgeGenEnc => Ok(crate::age::cmd_age_genenc().await),
Cmd::AgeGenSign => Ok(crate::age::cmd_age_gensign().await),
Cmd::AgeGenKey => Ok(crate::age::cmd_age_genkey().await),
Cmd::AgeEncrypt(recipient, message) => Ok(crate::age::cmd_age_encrypt(&recipient, &message).await),
Cmd::AgeDecrypt(identity, ct_b64) => Ok(crate::age::cmd_age_decrypt(&identity, &ct_b64).await),
Cmd::AgeSign(secret, message) => Ok(crate::age::cmd_age_sign(&secret, &message).await),
@@ -731,13 +983,52 @@ impl Cmd {
Cmd::AgeSignName(name, message) => Ok(crate::age::cmd_age_sign_name(server, &name, &message).await),
Cmd::AgeVerifyName(name, message, sig_b64) => Ok(crate::age::cmd_age_verify_name(server, &name, &message, &sig_b64).await),
Cmd::AgeList => Ok(crate::age::cmd_age_list(server).await),
// SYM (symmetric): stateless (Phase 1)
Cmd::SymKeygen => Ok(crate::sym::cmd_sym_keygen().await),
Cmd::SymEncrypt(key_b64, message) => Ok(crate::sym::cmd_sym_encrypt(&key_b64, &message).await),
Cmd::SymDecrypt(key_b64, ct_b64) => Ok(crate::sym::cmd_sym_decrypt(&key_b64, &ct_b64).await),
// Full-text search commands
Cmd::FtCreate { index_name, schema } => {
crate::search_cmd::ft_create_cmd(server, index_name, schema).await
}
Cmd::FtAdd { index_name, doc_id, score, fields } => {
crate::search_cmd::ft_add_cmd(server, index_name, doc_id, score, fields).await
}
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields } => {
crate::search_cmd::ft_search_cmd(server, index_name, query, filters, limit, offset, return_fields).await
}
Cmd::FtDel(index_name, doc_id) => {
crate::search_cmd::ft_del_cmd(server, index_name, doc_id).await
}
Cmd::FtInfo(index_name) => {
crate::search_cmd::ft_info_cmd(server, index_name).await
}
Cmd::FtDrop(index_name) => {
crate::search_cmd::ft_drop_cmd(server, index_name).await
}
Cmd::FtAlter { .. } => {
Ok(Protocol::err("FT.ALTER not implemented yet"))
}
Cmd::FtAggregate { .. } => {
Ok(Protocol::err("FT.AGGREGATE not implemented yet"))
}
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
}
}
pub fn to_protocol(self) -> Protocol {
match self {
Cmd::Select(db) => Protocol::Array(vec![Protocol::BulkString("select".to_string()), Protocol::BulkString(db.to_string())]),
Cmd::Select(db, key) => {
let mut arr = vec![Protocol::BulkString("select".to_string()), Protocol::BulkString(db.to_string())];
if let Some(k) = key {
arr.push(Protocol::BulkString("key".to_string()));
arr.push(Protocol::BulkString(k));
}
Protocol::Array(arr)
}
Cmd::Ping => Protocol::Array(vec![Protocol::BulkString("ping".to_string())]),
Cmd::Echo(s) => Protocol::Array(vec![Protocol::BulkString("echo".to_string()), Protocol::BulkString(s)]),
Cmd::Get(k) => Protocol::Array(vec![Protocol::BulkString("get".to_string()), Protocol::BulkString(k)]),
@@ -754,12 +1045,83 @@ async fn flushdb_cmd(server: &mut Server) -> Result<Protocol, DBError> {
}
}
async fn select_cmd(server: &mut Server, db: u64) -> Result<Protocol, DBError> {
// Test if we can access the database (this will create it if needed)
async fn select_cmd(server: &mut Server, db: u64, key: Option<String>) -> Result<Protocol, DBError> {
// Authorization and existence checks via admin DB 0
// DB 0: require KEY admin-secret
if db == 0 {
match key {
Some(k) if k == server.option.admin_secret => {
server.selected_db = 0;
server.current_permissions = Some(crate::rpc::Permissions::ReadWrite);
// Will create encrypted 0.db if missing
match server.current_storage() {
Ok(_) => return Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => return Ok(Protocol::err(&e.0)),
}
}
_ => {
return Ok(Protocol::err("ERR invalid access key"));
}
}
}
// DB > 0: must exist in admin:dbs
let exists = match crate::admin_meta::db_exists(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
db,
) {
Ok(b) => b,
Err(e) => return Ok(Protocol::err(&e.0)),
};
if !exists {
return Ok(Protocol::err(&format!(
"Cannot open database instance {}, as that database instance does not exist.",
db
)));
}
// Verify permissions (public => RW; private => use key)
let perms_opt = match crate::admin_meta::verify_access(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
db,
key.as_deref(),
) {
Ok(p) => p,
Err(e) => return Ok(Protocol::err(&e.0)),
};
let perms = match perms_opt {
Some(p) => p,
None => return Ok(Protocol::err("ERR invalid access key")),
};
// Set selected database and permissions, then open storage (skip for Tantivy backend)
server.selected_db = db;
match server.current_storage() {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
server.current_permissions = Some(perms);
// Resolve effective backend for this db_id from admin meta
let eff_backend = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
db,
)
.ok()
.flatten();
if matches!(eff_backend, Some(crate::options::BackendType::Tantivy)) {
// Tantivy DBs have no KV storage; allow SELECT to succeed
Ok(Protocol::SimpleString("OK".to_string()))
} else {
match server.current_storage() {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
}
@@ -1004,6 +1366,9 @@ async fn brpop_cmd(server: &Server, keys: &[String], timeout_secs: f64) -> Resul
}
async fn lpush_cmd(server: &Server, key: &str, elements: &[String]) -> Result<Protocol, DBError> {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
match server.current_storage()?.lpush(key, elements.to_vec()) {
Ok(len) => {
// Attempt to deliver to any blocked BLPOP waiters
@@ -1093,26 +1458,43 @@ async fn dbsize_cmd(server: &Server) -> Result<Protocol, DBError> {
}
}
#[derive(Serialize)]
struct ServerInfo {
redis_version: String,
encrypted: bool,
selected_db: u64,
}
async fn info_cmd(server: &Server, section: &Option<String>) -> Result<Protocol, DBError> {
let info = ServerInfo {
redis_version: "7.0.0".to_string(),
encrypted: server.current_storage()?.is_encrypted(),
selected_db: server.selected_db,
// For Tantivy backend, there is no KV storage; synthesize minimal info.
// Determine effective backend for the currently selected db.
let is_tantivy_db = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
let storage_info: Vec<(String, String)> = if is_tantivy_db {
vec![
("db_size".to_string(), "0".to_string()),
("is_encrypted".to_string(), "false".to_string()),
]
} else {
server.current_storage()?.info()?
};
let mut info_map: std::collections::HashMap<String, String> = storage_info.into_iter().collect();
info_map.insert("redis_version".to_string(), "7.0.0".to_string());
info_map.insert("selected_db".to_string(), server.selected_db.to_string());
info_map.insert("backend".to_string(), format!("{:?}", server.option.backend));
let mut info_string = String::new();
info_string.push_str(&format!("# Server\n"));
info_string.push_str(&format!("redis_version:{}\n", info.redis_version));
info_string.push_str(&format!("encrypted:{}\n", if info.encrypted { 1 } else { 0 }));
info_string.push_str(&format!("# Keyspace\n"));
info_string.push_str(&format!("db{}:keys=0,expires=0,avg_ttl=0\n", info.selected_db));
info_string.push_str("# Server\n");
info_string.push_str(&format!("redis_version:{}\n", info_map.get("redis_version").unwrap()));
info_string.push_str(&format!("backend:{}\n", info_map.get("backend").unwrap()));
info_string.push_str(&format!("encrypted:{}\n", info_map.get("is_encrypted").unwrap()));
info_string.push_str("# Keyspace\n");
info_string.push_str(&format!("db{}:keys={},expires=0,avg_ttl=0\n", info_map.get("selected_db").unwrap(), info_map.get("db_size").unwrap()));
match section {
Some(s) => {
@@ -1138,8 +1520,16 @@ async fn type_cmd(server: &Server, k: &String) -> Result<Protocol, DBError> {
}
async fn del_cmd(server: &Server, k: &str) -> Result<Protocol, DBError> {
server.current_storage()?.del(k.to_string())?;
Ok(Protocol::SimpleString("1".to_string()))
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
let storage = server.current_storage()?;
if storage.exists(k)? {
storage.del(k.to_string())?;
Ok(Protocol::SimpleString("1".to_string()))
} else {
Ok(Protocol::SimpleString("0".to_string()))
}
}
async fn set_ex_cmd(
@@ -1163,6 +1553,9 @@ async fn set_px_cmd(
}
async fn set_cmd(server: &Server, k: &str, v: &str) -> Result<Protocol, DBError> {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
server.current_storage()?.set(k.to_string(), v.to_string())?;
Ok(Protocol::SimpleString("OK".to_string()))
}
@@ -1247,6 +1640,9 @@ async fn mset_cmd(server: &Server, pairs: &[(String, String)]) -> Result<Protoco
// DEL with multiple keys: return count of keys actually deleted
async fn del_multi_cmd(server: &Server, keys: &[String]) -> Result<Protocol, DBError> {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
let storage = server.current_storage()?;
let mut deleted = 0i64;
for k in keys {
@@ -1277,6 +1673,9 @@ async fn get_cmd(server: &Server, k: &str) -> Result<Protocol, DBError> {
// Hash command implementations
async fn hset_cmd(server: &Server, key: &str, pairs: &[(String, String)]) -> Result<Protocol, DBError> {
if !server.has_write_permission() {
return Ok(Protocol::err("ERR write permission denied"));
}
let new_fields = server.current_storage()?.hset(key, pairs.to_vec())?;
Ok(Protocol::SimpleString(new_fields.to_string()))
}

View File

@@ -23,6 +23,7 @@ impl From<CryptoError> for crate::error::DBError {
}
/// Super-simple factory: new(secret) + encrypt(bytes) + decrypt(bytes)
#[derive(Clone)]
pub struct CryptoFactory {
key: chacha20poly1305::Key,
}

16
src/lib.rs Normal file
View File

@@ -0,0 +1,16 @@
pub mod age;
pub mod sym;
pub mod cmd;
pub mod crypto;
pub mod error;
pub mod options;
pub mod protocol;
pub mod rpc;
pub mod rpc_server;
pub mod server;
pub mod storage;
pub mod storage_trait;
pub mod storage_sled;
pub mod admin_meta;
pub mod tantivy_search;
pub mod search_cmd;

140
src/main.rs Normal file
View File

@@ -0,0 +1,140 @@
// #![allow(unused_imports)]
use tokio::net::TcpListener;
use herodb::server;
use herodb::rpc_server;
use clap::Parser;
/// Simple program to greet a person
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
/// The directory of Redis DB file
#[arg(long)]
dir: String,
/// The port of the Redis server, default is 6379 if not specified
#[arg(long)]
port: Option<u16>,
/// Enable debug mode
#[arg(long)]
debug: bool,
/// Master encryption key for encrypted databases (deprecated; ignored for data DBs)
#[arg(long)]
encryption_key: Option<String>,
/// Encrypt the database (deprecated; ignored for data DBs)
#[arg(long)]
encrypt: bool,
/// Enable RPC management server
#[arg(long)]
enable_rpc: bool,
/// RPC server port (default: 8080)
#[arg(long, default_value = "8080")]
rpc_port: u16,
/// Use the sled backend
#[arg(long)]
sled: bool,
/// Admin secret used to encrypt DB 0 and authorize admin access (required)
#[arg(long)]
admin_secret: String,
}
#[tokio::main]
async fn main() {
// parse args
let args = Args::parse();
// bind port
let port = args.port.unwrap_or(6379);
println!("will listen on port: {}", port);
let listener = TcpListener::bind(format!("127.0.0.1:{}", port))
.await
.unwrap();
// deprecation warnings for legacy flags
if args.encrypt || args.encryption_key.is_some() {
eprintln!("warning: --encrypt and --encryption-key are deprecated and ignored for data DBs. Admin DB 0 is always encrypted with --admin-secret.");
}
// basic validation for admin secret
if args.admin_secret.trim().is_empty() {
eprintln!("error: --admin-secret must not be empty");
std::process::exit(2);
}
// new DB option
let option = herodb::options::DBOption {
dir: args.dir.clone(),
port,
debug: args.debug,
encryption_key: args.encryption_key,
encrypt: args.encrypt,
backend: if args.sled {
herodb::options::BackendType::Sled
} else {
herodb::options::BackendType::Redb
},
admin_secret: args.admin_secret.clone(),
};
let backend = option.backend.clone();
// Bootstrap admin DB 0 before opening any server storage
if let Err(e) = herodb::admin_meta::ensure_bootstrap(&args.dir, backend.clone(), &args.admin_secret) {
eprintln!("Failed to bootstrap admin DB 0: {}", e.0);
std::process::exit(2);
}
// new server
let server = server::Server::new(option).await;
// Add a small delay to ensure the port is ready
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
// Start RPC server if enabled
let _rpc_handle = if args.enable_rpc {
let rpc_addr = format!("127.0.0.1:{}", args.rpc_port).parse().unwrap();
let base_dir = args.dir.clone();
match rpc_server::start_rpc_server(rpc_addr, base_dir, backend, args.admin_secret.clone()).await {
Ok(handle) => {
println!("RPC management server started on port {}", args.rpc_port);
Some(handle)
}
Err(e) => {
eprintln!("Failed to start RPC server: {}", e);
None
}
}
} else {
None
};
// accept new connections
loop {
let stream = listener.accept().await;
match stream {
Ok((stream, _)) => {
println!("accepted new connection");
let mut sc = server.clone();
tokio::spawn(async move {
if let Err(e) = sc.handle(stream).await {
println!("error: {:?}, will close the connection. Bye", e);
}
});
}
Err(e) => {
println!("error: {}", e);
}
}
}
}

20
src/options.rs Normal file
View File

@@ -0,0 +1,20 @@
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BackendType {
Redb,
Sled,
Tantivy, // Full-text search backend (no KV storage)
}
#[derive(Debug, Clone)]
pub struct DBOption {
pub dir: String,
pub port: u16,
pub debug: bool,
// Deprecated for data DBs; retained for backward-compat on CLI parsing
pub encrypt: bool,
// Deprecated for data DBs; retained for backward-compat on CLI parsing
pub encryption_key: Option<String>,
pub backend: BackendType,
// New: required admin secret, used to encrypt DB 0 and authorize admin operations
pub admin_secret: String,
}

646
src/rpc.rs Normal file
View File

@@ -0,0 +1,646 @@
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use jsonrpsee::{core::RpcResult, proc_macros::rpc};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::server::Server;
use crate::options::DBOption;
use crate::admin_meta;
/// Database backend types
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum BackendType {
Redb,
Sled,
Tantivy, // Full-text search backend (no KV storage)
// Future: InMemory, Custom(String)
}
/// Database configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatabaseConfig {
pub name: Option<String>,
pub storage_path: Option<String>,
pub max_size: Option<u64>,
pub redis_version: Option<String>,
}
/// Database information returned by metadata queries
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatabaseInfo {
pub id: u64,
pub name: Option<String>,
pub backend: BackendType,
pub encrypted: bool,
pub redis_version: Option<String>,
pub storage_path: Option<String>,
pub size_on_disk: Option<u64>,
pub key_count: Option<u64>,
pub created_at: u64,
pub last_access: Option<u64>,
}
/// Access permissions for database keys
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum Permissions {
Read,
ReadWrite,
}
/// Access key information returned by RPC
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AccessKeyInfo {
pub hash: String,
pub permissions: Permissions,
pub created_at: u64,
}
/// Hash a plaintext key using SHA-256
pub fn hash_key(key: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(key.as_bytes());
format!("{:x}", hasher.finalize())
}
/// RPC trait for HeroDB management
#[rpc(server, client, namespace = "herodb")]
pub trait Rpc {
/// Create a new database with specified configuration
#[method(name = "createDatabase")]
async fn create_database(
&self,
backend: BackendType,
config: DatabaseConfig,
encryption_key: Option<String>,
) -> RpcResult<u64>;
/// Set encryption for an existing database (write-only key)
#[method(name = "setEncryption")]
async fn set_encryption(&self, db_id: u64, encryption_key: String) -> RpcResult<bool>;
/// List all managed databases
#[method(name = "listDatabases")]
async fn list_databases(&self) -> RpcResult<Vec<DatabaseInfo>>;
/// Get detailed information about a specific database
#[method(name = "getDatabaseInfo")]
async fn get_database_info(&self, db_id: u64) -> RpcResult<DatabaseInfo>;
/// Delete a database
#[method(name = "deleteDatabase")]
async fn delete_database(&self, db_id: u64) -> RpcResult<bool>;
/// Get server statistics
#[method(name = "getServerStats")]
async fn get_server_stats(&self) -> RpcResult<HashMap<String, serde_json::Value>>;
/// Add an access key to a database
#[method(name = "addAccessKey")]
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool>;
/// Delete an access key from a database
#[method(name = "deleteAccessKey")]
async fn delete_access_key(&self, db_id: u64, key_hash: String) -> RpcResult<bool>;
/// List all access keys for a database
#[method(name = "listAccessKeys")]
async fn list_access_keys(&self, db_id: u64) -> RpcResult<Vec<AccessKeyInfo>>;
/// Set database public/private status
#[method(name = "setDatabasePublic")]
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool>;
// ----- Full-text (Tantivy) minimal RPC endpoints -----
/// Create a new FT index in a Tantivy-backed DB
#[method(name = "ftCreate")]
async fn ft_create(
&self,
db_id: u64,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> RpcResult<bool>;
/// Add or replace a document in an FT index
#[method(name = "ftAdd")]
async fn ft_add(
&self,
db_id: u64,
index_name: String,
doc_id: String,
score: f64,
fields: HashMap<String, String>,
) -> RpcResult<bool>;
/// Search an FT index
#[method(name = "ftSearch")]
async fn ft_search(
&self,
db_id: u64,
index_name: String,
query: String,
filters: Option<Vec<(String, String)>>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> RpcResult<serde_json::Value>;
/// Delete a document by id from an FT index
#[method(name = "ftDel")]
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool>;
/// Get FT index info
#[method(name = "ftInfo")]
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value>;
/// Drop an FT index
#[method(name = "ftDrop")]
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool>;
}
/// RPC Server implementation
pub struct RpcServerImpl {
/// Base directory for database files
base_dir: String,
/// Managed database servers
servers: Arc<RwLock<HashMap<u64, Arc<Server>>>>,
/// Default backend type
backend: crate::options::BackendType,
/// Admin secret used to encrypt DB 0 and authorize admin access
admin_secret: String,
}
impl RpcServerImpl {
/// Create a new RPC server instance
pub fn new(base_dir: String, backend: crate::options::BackendType, admin_secret: String) -> Self {
Self {
base_dir,
servers: Arc::new(RwLock::new(HashMap::new())),
backend,
admin_secret,
}
}
/// Get or create a server instance for the given database ID
async fn get_or_create_server(&self, db_id: u64) -> Result<Arc<Server>, jsonrpsee::types::ErrorObjectOwned> {
// Check if server already exists
{
let servers = self.servers.read().await;
if let Some(server) = servers.get(&db_id) {
return Ok(server.clone());
}
}
// Validate existence via admin DB 0 (metadata), not filesystem presence
let exists = admin_meta::db_exists(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
if !exists {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(
-32000,
format!("Database {} not found", db_id),
None::<()>
));
}
// Resolve effective backend for this db from admin meta or filesystem; fallback to default
let meta_backend = admin_meta::get_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
.ok()
.flatten();
let db_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}.db", db_id));
let sniffed_backend = if db_path.exists() {
if db_path.is_file() {
Some(crate::options::BackendType::Redb)
} else if db_path.is_dir() {
Some(crate::options::BackendType::Sled)
} else {
None
}
} else {
None
};
let effective_backend = meta_backend.clone().or(sniffed_backend).unwrap_or(self.backend.clone());
if effective_backend != self.backend {
eprintln!(
"notice: get_or_create_server: db {} backend resolved to {:?} (server default {:?})",
db_id, effective_backend, self.backend
);
}
// If we had to sniff (no meta), persist the resolved backend
if meta_backend.is_none() {
let _ = admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, effective_backend.clone());
}
// Create server instance with resolved backend
let is_tantivy = matches!(effective_backend, crate::options::BackendType::Tantivy);
let db_option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
debug: false,
encryption_key: None,
encrypt: false,
backend: effective_backend.clone(),
admin_secret: self.admin_secret.clone(),
};
let mut server = Server::new(db_option).await;
// Set the selected database to the db_id
server.selected_db = db_id;
// Lazily open/create physical storage according to admin meta (per-db encryption)
// Skip for Tantivy backend (no KV storage to open)
if !is_tantivy {
let _ = server.current_storage();
}
// Store the server
let mut servers = self.servers.write().await;
servers.insert(db_id, Arc::new(server.clone()));
Ok(Arc::new(server))
}
/// Discover existing database IDs from admin DB 0
async fn discover_databases(&self) -> Vec<u64> {
admin_meta::list_dbs(&self.base_dir, self.backend.clone(), &self.admin_secret)
.unwrap_or_default()
}
/// Build database file path for given server/db_id
fn db_file_path(&self, server: &Server, db_id: u64) -> std::path::PathBuf {
std::path::PathBuf::from(&server.option.dir).join(format!("{}.db", db_id))
}
/// Recursively compute size on disk for the database path
fn compute_size_on_disk(&self, path: &std::path::Path) -> Option<u64> {
fn dir_size(p: &std::path::Path) -> u64 {
if p.is_file() {
std::fs::metadata(p).map(|m| m.len()).unwrap_or(0)
} else if p.is_dir() {
let mut total = 0u64;
if let Ok(read) = std::fs::read_dir(p) {
for entry in read.flatten() {
total += dir_size(&entry.path());
}
}
total
} else {
0
}
}
Some(dir_size(path))
}
/// Extract created and last access times (secs) from a path, with fallbacks
fn get_file_times_secs(path: &std::path::Path) -> (u64, Option<u64>) {
let now = std::time::SystemTime::now();
let created = std::fs::metadata(path)
.and_then(|m| m.created().or_else(|_| m.modified()))
.unwrap_or(now)
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let last_access = std::fs::metadata(path)
.and_then(|m| m.accessed())
.ok()
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok().map(|d| d.as_secs()));
(created, last_access)
}
/// Compose a DatabaseInfo by probing storage and filesystem, with admin meta for access key count
async fn build_database_info(&self, db_id: u64, server: &Server) -> DatabaseInfo {
// Probe storage to determine encryption state
let storage = server.current_storage().ok();
let encrypted = storage.as_ref().map(|s| s.is_encrypted()).unwrap_or(server.option.encrypt);
// Get actual key count from storage
let key_count = storage.as_ref()
.and_then(|s| s.dbsize().ok())
.map(|count| count as u64);
// Get database name from admin meta
let name = admin_meta::get_database_name(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
.ok()
.flatten();
// Compute size on disk and timestamps from the DB file path
let db_path = self.db_file_path(server, db_id);
let size_on_disk = self.compute_size_on_disk(&db_path);
let (created_at, last_access) = Self::get_file_times_secs(&db_path);
let backend = match server.option.backend {
crate::options::BackendType::Redb => BackendType::Redb,
crate::options::BackendType::Sled => BackendType::Sled,
crate::options::BackendType::Tantivy => BackendType::Tantivy,
};
DatabaseInfo {
id: db_id,
name,
backend,
encrypted,
redis_version: Some("7.0".to_string()),
storage_path: Some(server.option.dir.clone()),
size_on_disk,
key_count,
created_at,
last_access,
}
}
}
#[jsonrpsee::core::async_trait]
impl RpcServer for RpcServerImpl {
async fn create_database(
&self,
backend: BackendType,
config: DatabaseConfig,
encryption_key: Option<String>,
) -> RpcResult<u64> {
// Allocate new ID via admin DB 0
let db_id = admin_meta::allocate_next_id(&self.base_dir, self.backend.clone(), &self.admin_secret)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
// Persist per-db encryption key in admin DB 0 if provided
if let Some(ref key) = encryption_key {
admin_meta::set_enc_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, key)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
}
// Persist database name if provided
if let Some(ref name) = config.name {
admin_meta::set_database_name(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, name)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
}
// Ensure base dir exists
if let Err(e) = std::fs::create_dir_all(&self.base_dir) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, format!("Failed to ensure base dir: {}", e), None::<()>));
}
// Map RPC backend to options backend and persist it in admin meta for this db id
let opt_backend = match backend {
BackendType::Redb => crate::options::BackendType::Redb,
BackendType::Sled => crate::options::BackendType::Sled,
BackendType::Tantivy => crate::options::BackendType::Tantivy,
};
admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, opt_backend.clone())
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
// Create server instance using base_dir, chosen backend and admin secret
let is_tantivy_new = matches!(opt_backend, crate::options::BackendType::Tantivy);
let option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
debug: false,
encryption_key: None, // per-db key is stored in admin DB 0
encrypt: false, // encryption decided per-db at open time
backend: opt_backend.clone(),
admin_secret: self.admin_secret.clone(),
};
let mut server = Server::new(option).await;
server.selected_db = db_id;
// Initialize storage to create physical <id>.db with proper encryption from admin meta
// Skip for Tantivy backend (no KV storage to initialize)
if !is_tantivy_new {
let _ = server.current_storage();
}
// Store the server in cache
let mut servers = self.servers.write().await;
servers.insert(db_id, Arc::new(server));
Ok(db_id)
}
async fn set_encryption(&self, _db_id: u64, _encryption_key: String) -> RpcResult<bool> {
// For now, return false as encryption can only be set during creation
let _servers = self.servers.read().await;
// TODO: Implement encryption setting for existing databases
Ok(false)
}
async fn list_databases(&self) -> RpcResult<Vec<DatabaseInfo>> {
let db_ids = self.discover_databases().await;
let mut result = Vec::new();
for db_id in db_ids {
if let Ok(server) = self.get_or_create_server(db_id).await {
// Build accurate info from storage/meta/fs
let info = self.build_database_info(db_id, &server).await;
result.push(info);
}
}
Ok(result)
}
async fn get_database_info(&self, db_id: u64) -> RpcResult<DatabaseInfo> {
let server = self.get_or_create_server(db_id).await?;
// Build accurate info from storage/meta/fs
let info = self.build_database_info(db_id, &server).await;
Ok(info)
}
async fn delete_database(&self, db_id: u64) -> RpcResult<bool> {
let mut servers = self.servers.write().await;
if let Some(_server) = servers.remove(&db_id) {
// Clean up database files
let db_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}.db", db_id));
if db_path.exists() {
if db_path.is_dir() {
std::fs::remove_dir_all(&db_path).ok();
} else {
std::fs::remove_file(&db_path).ok();
}
}
Ok(true)
} else {
Ok(false)
}
}
async fn get_server_stats(&self) -> RpcResult<HashMap<String, serde_json::Value>> {
let db_ids = self.discover_databases().await;
let mut stats = HashMap::new();
stats.insert("total_databases".to_string(), serde_json::json!(db_ids.len()));
stats.insert("uptime".to_string(), serde_json::json!(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs()
));
Ok(stats)
}
// ----- Full-text (Tantivy) minimal RPC endpoints -----
async fn ft_create(
&self,
db_id: u64,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_create_cmd(&*server, index_name, schema)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_add(
&self,
db_id: u64,
index_name: String,
doc_id: String,
score: f64,
fields: HashMap<String, String>,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_add_cmd(&*server, index_name, doc_id, score, fields)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_search(
&self,
db_id: u64,
index_name: String,
query: String,
filters: Option<Vec<(String, String)>>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> RpcResult<serde_json::Value> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
let proto = crate::search_cmd::ft_search_cmd(
&*server,
index_name,
query,
filters.unwrap_or_default(),
limit,
offset,
return_fields,
)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(serde_json::json!({ "resp": proto.encode() }))
}
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_del_cmd(&*server, index_name, doc_id)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
let proto = crate::search_cmd::ft_info_cmd(&*server, index_name)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(serde_json::json!({ "resp": proto.encode() }))
}
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_drop_cmd(&*server, index_name)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool> {
let perms = match permissions.to_lowercase().as_str() {
"read" => Permissions::Read,
"readwrite" => Permissions::ReadWrite,
_ => return Err(jsonrpsee::types::ErrorObjectOwned::owned(
-32000,
"Invalid permissions: use 'read' or 'readwrite'",
None::<()>
)),
};
admin_meta::add_access_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, &key, perms)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn delete_access_key(&self, db_id: u64, key_hash: String) -> RpcResult<bool> {
let ok = admin_meta::delete_access_key(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, &key_hash)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(ok)
}
async fn list_access_keys(&self, db_id: u64) -> RpcResult<Vec<AccessKeyInfo>> {
let pairs = admin_meta::list_access_keys(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
let keys: Vec<AccessKeyInfo> = pairs.into_iter().map(|(hash, perm, ts)| AccessKeyInfo {
hash,
permissions: perm,
created_at: ts,
}).collect();
Ok(keys)
}
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool> {
admin_meta::set_database_public(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, public)
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
}

49
src/rpc_server.rs Normal file
View File

@@ -0,0 +1,49 @@
use std::net::SocketAddr;
use jsonrpsee::server::{ServerBuilder, ServerHandle};
use jsonrpsee::RpcModule;
use crate::rpc::{RpcServer, RpcServerImpl};
/// Start the RPC server on the specified address
pub async fn start_rpc_server(addr: SocketAddr, base_dir: String, backend: crate::options::BackendType, admin_secret: String) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> {
// Create the RPC server implementation
let rpc_impl = RpcServerImpl::new(base_dir, backend, admin_secret);
// Create the RPC module
let mut module = RpcModule::new(());
module.merge(RpcServer::into_rpc(rpc_impl))?;
// Build the server with both HTTP and WebSocket support
let server = ServerBuilder::default()
.build(addr)
.await?;
// Start the server
let handle = server.start(module);
println!("RPC server started on {}", addr);
Ok(handle)
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
#[tokio::test]
async fn test_rpc_server_startup() {
let addr = "127.0.0.1:0".parse().unwrap(); // Use port 0 for auto-assignment
let base_dir = "/tmp/test_rpc".to_string();
let backend = crate::options::BackendType::Redb; // Default for test
let handle = start_rpc_server(addr, base_dir, backend, "test-admin".to_string()).await.unwrap();
// Give the server a moment to start
tokio::time::sleep(Duration::from_millis(100)).await;
// Stop the server
handle.stop().unwrap();
handle.stopped().await;
}
}

352
src/search_cmd.rs Normal file
View File

@@ -0,0 +1,352 @@
use crate::{
error::DBError,
protocol::Protocol,
server::Server,
tantivy_search::{
FieldDef, Filter, FilterType, IndexConfig, NumericType, SearchOptions, TantivySearch,
},
};
use std::collections::HashMap;
use std::sync::Arc;
pub async fn ft_create_cmd(
server: &Server,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
// Parse schema into field definitions
let mut field_definitions = Vec::new();
for (field_name, field_type, options) in schema {
let field_def = match field_type.to_uppercase().as_str() {
"TEXT" => {
let mut sortable = false;
let mut no_index = false;
// Weight is not used in current implementation
let mut _weight = 1.0f32;
let mut i = 0;
while i < options.len() {
match options[i].to_uppercase().as_str() {
"WEIGHT" => {
if i + 1 < options.len() {
_weight = options[i + 1].parse::<f32>().unwrap_or(1.0);
i += 2;
continue;
}
}
"SORTABLE" => {
sortable = true;
}
"NOINDEX" => {
no_index = true;
}
_ => {}
}
i += 1;
}
FieldDef::Text {
stored: true,
indexed: !no_index,
tokenized: true,
fast: sortable,
}
}
"NUMERIC" => {
// default to F64
let mut sortable = false;
for opt in &options {
if opt.to_uppercase() == "SORTABLE" {
sortable = true;
}
}
FieldDef::Numeric {
stored: true,
indexed: true,
fast: sortable,
precision: NumericType::F64,
}
}
"TAG" => {
let mut separator = ",".to_string();
let mut case_sensitive = false;
let mut i = 0;
while i < options.len() {
match options[i].to_uppercase().as_str() {
"SEPARATOR" => {
if i + 1 < options.len() {
separator = options[i + 1].clone();
i += 2;
continue;
}
}
"CASESENSITIVE" => {
case_sensitive = true;
}
_ => {}
}
i += 1;
}
FieldDef::Tag {
stored: true,
separator,
case_sensitive,
}
}
"GEO" => FieldDef::Geo { stored: true },
_ => {
return Err(DBError(format!("Unknown field type: {}", field_type)));
}
};
field_definitions.push((field_name, field_def));
}
// Create the search index
let search_path = server.search_index_path();
let config = IndexConfig::default();
let search_index = TantivySearch::new_with_schema(
search_path,
index_name.clone(),
field_definitions,
Some(config),
)?;
// Store in registry
let mut indexes = server.search_indexes.write().unwrap();
indexes.insert(index_name, Arc::new(search_index));
Ok(Protocol::SimpleString("OK".to_string()))
}
pub async fn ft_add_cmd(
server: &Server,
index_name: String,
doc_id: String,
_score: f64,
fields: HashMap<String, String>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
search_index.add_document_with_fields(&doc_id, fields)?;
Ok(Protocol::SimpleString("OK".to_string()))
}
pub async fn ft_search_cmd(
server: &Server,
index_name: String,
query: String,
filters: Vec<(String, String)>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
let search_filters = filters
.into_iter()
.map(|(field, value)| Filter {
field,
filter_type: FilterType::Equals(value),
})
.collect();
let options = SearchOptions {
limit: limit.unwrap_or(10),
offset: offset.unwrap_or(0),
filters: search_filters,
sort_by: None,
return_fields,
highlight: false,
};
let results = search_index.search_with_options(&query, options)?;
// Format results as Redis protocol
let mut response = Vec::new();
// First element is the total count
response.push(Protocol::SimpleString(results.total.to_string()));
// Then each document
for doc in results.documents {
let mut doc_array = Vec::new();
// Add document ID if it exists
if let Some(id) = doc.fields.get("_id") {
doc_array.push(Protocol::BulkString(id.clone()));
}
// Add score
doc_array.push(Protocol::BulkString(doc.score.to_string()));
// Add fields as key-value pairs
for (field_name, field_value) in doc.fields {
if field_name != "_id" {
doc_array.push(Protocol::BulkString(field_name));
doc_array.push(Protocol::BulkString(field_value));
}
}
response.push(Protocol::Array(doc_array));
}
Ok(Protocol::Array(response))
}
pub async fn ft_del_cmd(
server: &Server,
index_name: String,
doc_id: String,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let _search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
// Not fully implemented yet: Tantivy delete by term would require a writer session and commit coordination.
println!("Deleting document '{}' from index '{}'", doc_id, index_name);
Ok(Protocol::SimpleString("1".to_string()))
}
pub async fn ft_info_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
let info = search_index.get_info()?;
// Format info as Redis protocol
let mut response = Vec::new();
response.push(Protocol::BulkString("index_name".to_string()));
response.push(Protocol::BulkString(info.name));
response.push(Protocol::BulkString("num_docs".to_string()));
response.push(Protocol::BulkString(info.num_docs.to_string()));
response.push(Protocol::BulkString("num_fields".to_string()));
response.push(Protocol::BulkString(info.fields.len().to_string()));
response.push(Protocol::BulkString("fields".to_string()));
let fields_str = info
.fields
.iter()
.map(|f| format!("{}:{}", f.name, f.field_type))
.collect::<Vec<_>>()
.join(", ");
response.push(Protocol::BulkString(fields_str));
Ok(Protocol::Array(response))
}
pub async fn ft_drop_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
// Remove from registry
{
let mut indexes = server.search_indexes.write().unwrap();
indexes.remove(&index_name);
}
// Remove the index files from disk
let index_path = server.search_index_path().join(&index_name);
if index_path.exists() {
std::fs::remove_dir_all(&index_path)
.map_err(|e| DBError(format!("Failed to remove index files: {}", e)))?;
}
Ok(Protocol::SimpleString("OK".to_string()))
}

View File

@@ -11,15 +11,20 @@ use crate::cmd::Cmd;
use crate::error::DBError;
use crate::options;
use crate::protocol::Protocol;
use crate::storage::Storage;
use crate::storage_trait::StorageBackend;
use crate::admin_meta;
#[derive(Clone)]
pub struct Server {
pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<Storage>>>>,
pub db_cache: std::sync::Arc<std::sync::RwLock<HashMap<u64, Arc<dyn StorageBackend>>>>,
pub option: options::DBOption,
pub client_name: Option<String>,
pub selected_db: u64, // Changed from usize to u64
pub queued_cmd: Option<Vec<(Cmd, Protocol)>>,
pub current_permissions: Option<crate::rpc::Permissions>,
// In-memory registry of Tantivy search indexes for this server
pub search_indexes: Arc<std::sync::RwLock<HashMap<String, Arc<crate::tantivy_search::TantivySearch>>>>,
// BLPOP waiter registry: per (db_index, key) FIFO of waiters
pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>,
@@ -46,46 +51,64 @@ impl Server {
client_name: None,
selected_db: 0,
queued_cmd: None,
current_permissions: None,
search_indexes: Arc::new(std::sync::RwLock::new(HashMap::new())),
list_waiters: Arc::new(Mutex::new(HashMap::new())),
waiter_seq: Arc::new(AtomicU64::new(1)),
}
}
pub fn current_storage(&self) -> Result<Arc<Storage>, DBError> {
// Path where search indexes are stored, namespaced per selected DB:
// <base_dir>/search_indexes/<db_id>
pub fn search_index_path(&self) -> std::path::PathBuf {
let base = std::path::PathBuf::from(&self.option.dir)
.join("search_indexes")
.join(self.selected_db.to_string());
if !base.exists() {
let _ = std::fs::create_dir_all(&base);
}
base
}
pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> {
let mut cache = self.db_cache.write().unwrap();
if let Some(storage) = cache.get(&self.selected_db) {
return Ok(storage.clone());
}
// Create new database file
let db_file_path = std::path::PathBuf::from(self.option.dir.clone())
.join(format!("{}.db", self.selected_db));
// Ensure the directory exists before creating the database file
if let Some(parent_dir) = db_file_path.parent() {
std::fs::create_dir_all(parent_dir).map_err(|e| {
DBError(format!("Failed to create directory {}: {}", parent_dir.display(), e))
})?;
}
println!("Creating new db file: {}", db_file_path.display());
let storage = Arc::new(Storage::new(
db_file_path,
self.should_encrypt_db(self.selected_db),
self.option.encryption_key.as_deref()
)?);
// Use process-wide shared handles to avoid sled/reDB double-open lock contention.
let storage = if self.selected_db == 0 {
// Admin DB 0: always via singleton
admin_meta::open_admin_storage(
&self.option.dir,
self.option.backend.clone(),
&self.option.admin_secret,
)?
} else {
// Data DBs: via global registry keyed by id
admin_meta::open_data_storage(
&self.option.dir,
self.option.backend.clone(),
&self.option.admin_secret,
self.selected_db,
)?
};
cache.insert(self.selected_db, storage.clone());
Ok(storage)
}
fn should_encrypt_db(&self, db_index: u64) -> bool {
// DB 0-9 are non-encrypted, DB 10+ are encrypted
self.option.encrypt && db_index >= 10
/// Check if current permissions allow read operations
pub fn has_read_permission(&self) -> bool {
matches!(self.current_permissions, Some(crate::rpc::Permissions::Read) | Some(crate::rpc::Permissions::ReadWrite))
}
/// Check if current permissions allow write operations
pub fn has_write_permission(&self) -> bool {
matches!(self.current_permissions, Some(crate::rpc::Permissions::ReadWrite))
}
// ----- BLPOP waiter helpers -----

287
src/storage/mod.rs Normal file
View File

@@ -0,0 +1,287 @@
use std::{
path::Path,
sync::Arc,
time::{SystemTime, UNIX_EPOCH},
};
use redb::{Database, TableDefinition};
use serde::{Deserialize, Serialize};
use crate::crypto::CryptoFactory;
use crate::error::DBError;
// Re-export modules
mod storage_basic;
mod storage_hset;
mod storage_lists;
mod storage_extra;
// Re-export implementations
// Note: These imports are used by the impl blocks in the submodules
// The compiler shows them as unused because they're not directly used in this file
// but they're needed for the Storage struct methods to be available
pub use storage_extra::*;
// Table definitions for different Redis data types
const TYPES_TABLE: TableDefinition<&str, &str> = TableDefinition::new("types");
const STRINGS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("strings");
const HASHES_TABLE: TableDefinition<(&str, &str), &[u8]> = TableDefinition::new("hashes");
const LISTS_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("lists");
const STREAMS_META_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("streams_meta");
const STREAMS_DATA_TABLE: TableDefinition<(&str, &str), &[u8]> = TableDefinition::new("streams_data");
const ENCRYPTED_TABLE: TableDefinition<&str, u8> = TableDefinition::new("encrypted");
const EXPIRATION_TABLE: TableDefinition<&str, u64> = TableDefinition::new("expiration");
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct StreamEntry {
pub fields: Vec<(String, String)>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ListValue {
pub elements: Vec<String>,
}
#[inline]
pub fn now_in_millis() -> u128 {
let start = SystemTime::now();
let duration_since_epoch = start.duration_since(UNIX_EPOCH).unwrap();
duration_since_epoch.as_millis()
}
pub struct Storage {
db: Database,
crypto: Option<CryptoFactory>,
}
impl Storage {
pub fn new(path: impl AsRef<Path>, should_encrypt: bool, master_key: Option<&str>) -> Result<Self, DBError> {
let db = Database::create(path)?;
// Create tables if they don't exist
let write_txn = db.begin_write()?;
{
let _ = write_txn.open_table(TYPES_TABLE)?;
let _ = write_txn.open_table(STRINGS_TABLE)?;
let _ = write_txn.open_table(HASHES_TABLE)?;
let _ = write_txn.open_table(LISTS_TABLE)?;
let _ = write_txn.open_table(STREAMS_META_TABLE)?;
let _ = write_txn.open_table(STREAMS_DATA_TABLE)?;
let _ = write_txn.open_table(ENCRYPTED_TABLE)?;
let _ = write_txn.open_table(EXPIRATION_TABLE)?;
}
write_txn.commit()?;
// Check if database was previously encrypted
let read_txn = db.begin_read()?;
let encrypted_table = read_txn.open_table(ENCRYPTED_TABLE)?;
let was_encrypted = encrypted_table.get("encrypted")?.map(|v| v.value() == 1).unwrap_or(false);
drop(read_txn);
let crypto = if should_encrypt || was_encrypted {
if let Some(key) = master_key {
Some(CryptoFactory::new(key.as_bytes()))
} else {
return Err(DBError("Encryption requested but no master key provided".to_string()));
}
} else {
None
};
// If we're enabling encryption for the first time, mark it
if should_encrypt && !was_encrypted {
let write_txn = db.begin_write()?;
{
let mut encrypted_table = write_txn.open_table(ENCRYPTED_TABLE)?;
encrypted_table.insert("encrypted", &1u8)?;
}
write_txn.commit()?;
}
Ok(Storage {
db,
crypto,
})
}
pub fn is_encrypted(&self) -> bool {
self.crypto.is_some()
}
// Helper methods for encryption
fn encrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
if let Some(crypto) = &self.crypto {
Ok(crypto.encrypt(data))
} else {
Ok(data.to_vec())
}
}
fn decrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
if let Some(crypto) = &self.crypto {
Ok(crypto.decrypt(data)?)
} else {
Ok(data.to_vec())
}
}
}
use crate::storage_trait::StorageBackend;
impl StorageBackend for Storage {
fn get(&self, key: &str) -> Result<Option<String>, DBError> {
self.get(key)
}
fn set(&self, key: String, value: String) -> Result<(), DBError> {
self.set(key, value)
}
fn setx(&self, key: String, value: String, expire_ms: u128) -> Result<(), DBError> {
self.setx(key, value, expire_ms)
}
fn del(&self, key: String) -> Result<(), DBError> {
self.del(key)
}
fn exists(&self, key: &str) -> Result<bool, DBError> {
self.exists(key)
}
fn keys(&self, pattern: &str) -> Result<Vec<String>, DBError> {
self.keys(pattern)
}
fn dbsize(&self) -> Result<i64, DBError> {
self.dbsize()
}
fn flushdb(&self) -> Result<(), DBError> {
self.flushdb()
}
fn get_key_type(&self, key: &str) -> Result<Option<String>, DBError> {
self.get_key_type(key)
}
fn scan(&self, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError> {
self.scan(cursor, pattern, count)
}
fn hscan(&self, key: &str, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError> {
self.hscan(key, cursor, pattern, count)
}
fn hset(&self, key: &str, pairs: Vec<(String, String)>) -> Result<i64, DBError> {
self.hset(key, pairs)
}
fn hget(&self, key: &str, field: &str) -> Result<Option<String>, DBError> {
self.hget(key, field)
}
fn hgetall(&self, key: &str) -> Result<Vec<(String, String)>, DBError> {
self.hgetall(key)
}
fn hdel(&self, key: &str, fields: Vec<String>) -> Result<i64, DBError> {
self.hdel(key, fields)
}
fn hexists(&self, key: &str, field: &str) -> Result<bool, DBError> {
self.hexists(key, field)
}
fn hkeys(&self, key: &str) -> Result<Vec<String>, DBError> {
self.hkeys(key)
}
fn hvals(&self, key: &str) -> Result<Vec<String>, DBError> {
self.hvals(key)
}
fn hlen(&self, key: &str) -> Result<i64, DBError> {
self.hlen(key)
}
fn hmget(&self, key: &str, fields: Vec<String>) -> Result<Vec<Option<String>>, DBError> {
self.hmget(key, fields)
}
fn hsetnx(&self, key: &str, field: &str, value: &str) -> Result<bool, DBError> {
self.hsetnx(key, field, value)
}
fn lpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError> {
self.lpush(key, elements)
}
fn rpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError> {
self.rpush(key, elements)
}
fn lpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError> {
self.lpop(key, count)
}
fn rpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError> {
self.rpop(key, count)
}
fn llen(&self, key: &str) -> Result<i64, DBError> {
self.llen(key)
}
fn lindex(&self, key: &str, index: i64) -> Result<Option<String>, DBError> {
self.lindex(key, index)
}
fn lrange(&self, key: &str, start: i64, stop: i64) -> Result<Vec<String>, DBError> {
self.lrange(key, start, stop)
}
fn ltrim(&self, key: &str, start: i64, stop: i64) -> Result<(), DBError> {
self.ltrim(key, start, stop)
}
fn lrem(&self, key: &str, count: i64, element: &str) -> Result<i64, DBError> {
self.lrem(key, count, element)
}
fn ttl(&self, key: &str) -> Result<i64, DBError> {
self.ttl(key)
}
fn expire_seconds(&self, key: &str, secs: u64) -> Result<bool, DBError> {
self.expire_seconds(key, secs)
}
fn pexpire_millis(&self, key: &str, ms: u128) -> Result<bool, DBError> {
self.pexpire_millis(key, ms)
}
fn persist(&self, key: &str) -> Result<bool, DBError> {
self.persist(key)
}
fn expire_at_seconds(&self, key: &str, ts_secs: i64) -> Result<bool, DBError> {
self.expire_at_seconds(key, ts_secs)
}
fn pexpire_at_millis(&self, key: &str, ts_ms: i64) -> Result<bool, DBError> {
self.pexpire_at_millis(key, ts_ms)
}
fn is_encrypted(&self) -> bool {
self.is_encrypted()
}
fn info(&self) -> Result<Vec<(String, String)>, DBError> {
self.info()
}
fn clone_arc(&self) -> Arc<dyn StorageBackend> {
unimplemented!("Storage cloning not yet implemented for redb backend")
}
}

View File

@@ -208,6 +208,14 @@ impl Storage {
write_txn.commit()?;
Ok(applied)
}
pub fn info(&self) -> Result<Vec<(String, String)>, DBError> {
let dbsize = self.dbsize()?;
Ok(vec![
("db_size".to_string(), dbsize.to_string()),
("is_encrypted".to_string(), self.is_encrypted().to_string()),
])
}
}
// Utility function for glob pattern matching

845
src/storage_sled/mod.rs Normal file
View File

@@ -0,0 +1,845 @@
// src/storage_sled/mod.rs
use std::path::Path;
use std::sync::Arc;
use std::collections::HashMap;
use std::time::{SystemTime, UNIX_EPOCH};
use serde::{Deserialize, Serialize};
use crate::error::DBError;
use crate::storage_trait::StorageBackend;
use crate::crypto::CryptoFactory;
#[derive(Serialize, Deserialize, Debug, Clone)]
enum ValueType {
String(String),
Hash(HashMap<String, String>),
List(Vec<String>),
}
#[derive(Serialize, Deserialize, Debug, Clone)]
struct StorageValue {
value: ValueType,
expires_at: Option<u128>, // milliseconds since epoch
}
pub struct SledStorage {
db: sled::Db,
types: sled::Tree,
crypto: Option<CryptoFactory>,
}
impl SledStorage {
pub fn new(path: impl AsRef<Path>, should_encrypt: bool, master_key: Option<&str>) -> Result<Self, DBError> {
let db = sled::open(path).map_err(|e| DBError(format!("Failed to open sled: {}", e)))?;
let types = db.open_tree("types").map_err(|e| DBError(format!("Failed to open types tree: {}", e)))?;
// Check if database was previously encrypted
let encrypted_tree = db.open_tree("encrypted").map_err(|e| DBError(e.to_string()))?;
let was_encrypted = encrypted_tree.get("encrypted")
.map_err(|e| DBError(e.to_string()))?
.map(|v| v[0] == 1)
.unwrap_or(false);
let crypto = if should_encrypt || was_encrypted {
if let Some(key) = master_key {
Some(CryptoFactory::new(key.as_bytes()))
} else {
return Err(DBError("Encryption requested but no master key provided".to_string()));
}
} else {
None
};
// Mark database as encrypted if enabling encryption
if should_encrypt && !was_encrypted {
encrypted_tree.insert("encrypted", &[1u8])
.map_err(|e| DBError(e.to_string()))?;
encrypted_tree.flush().map_err(|e| DBError(e.to_string()))?;
}
Ok(SledStorage { db, types, crypto })
}
fn now_millis() -> u128 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_millis()
}
fn encrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
if let Some(crypto) = &self.crypto {
Ok(crypto.encrypt(data))
} else {
Ok(data.to_vec())
}
}
fn decrypt_if_needed(&self, data: &[u8]) -> Result<Vec<u8>, DBError> {
if let Some(crypto) = &self.crypto {
Ok(crypto.decrypt(data)?)
} else {
Ok(data.to_vec())
}
}
fn get_storage_value(&self, key: &str) -> Result<Option<StorageValue>, DBError> {
match self.db.get(key).map_err(|e| DBError(e.to_string()))? {
Some(encrypted_data) => {
let decrypted = self.decrypt_if_needed(&encrypted_data)?;
let storage_val: StorageValue = bincode::deserialize(&decrypted)
.map_err(|e| DBError(format!("Deserialization error: {}", e)))?;
// Check expiration
if let Some(expires_at) = storage_val.expires_at {
if Self::now_millis() > expires_at {
// Expired, remove it
self.db.remove(key).map_err(|e| DBError(e.to_string()))?;
self.types.remove(key).map_err(|e| DBError(e.to_string()))?;
return Ok(None);
}
}
Ok(Some(storage_val))
}
None => Ok(None)
}
}
fn set_storage_value(&self, key: &str, storage_val: StorageValue) -> Result<(), DBError> {
let data = bincode::serialize(&storage_val)
.map_err(|e| DBError(format!("Serialization error: {}", e)))?;
let encrypted = self.encrypt_if_needed(&data)?;
self.db.insert(key, encrypted).map_err(|e| DBError(e.to_string()))?;
// Store type info (unencrypted for efficiency)
let type_str = match &storage_val.value {
ValueType::String(_) => "string",
ValueType::Hash(_) => "hash",
ValueType::List(_) => "list",
};
self.types.insert(key, type_str.as_bytes()).map_err(|e| DBError(e.to_string()))?;
Ok(())
}
fn glob_match(pattern: &str, text: &str) -> bool {
if pattern == "*" {
return true;
}
let pattern_chars: Vec<char> = pattern.chars().collect();
let text_chars: Vec<char> = text.chars().collect();
fn match_recursive(pattern: &[char], text: &[char], pi: usize, ti: usize) -> bool {
if pi >= pattern.len() {
return ti >= text.len();
}
if ti >= text.len() {
return pattern[pi..].iter().all(|&c| c == '*');
}
match pattern[pi] {
'*' => {
for i in ti..=text.len() {
if match_recursive(pattern, text, pi + 1, i) {
return true;
}
}
false
}
'?' => match_recursive(pattern, text, pi + 1, ti + 1),
c => {
if text[ti] == c {
match_recursive(pattern, text, pi + 1, ti + 1)
} else {
false
}
}
}
}
match_recursive(&pattern_chars, &text_chars, 0, 0)
}
}
impl StorageBackend for SledStorage {
fn get(&self, key: &str) -> Result<Option<String>, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::String(s) => Ok(Some(s)),
_ => Ok(None)
}
None => Ok(None)
}
}
fn set(&self, key: String, value: String) -> Result<(), DBError> {
let storage_val = StorageValue {
value: ValueType::String(value),
expires_at: None,
};
self.set_storage_value(&key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(())
}
fn setx(&self, key: String, value: String, expire_ms: u128) -> Result<(), DBError> {
let storage_val = StorageValue {
value: ValueType::String(value),
expires_at: Some(Self::now_millis() + expire_ms),
};
self.set_storage_value(&key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(())
}
fn del(&self, key: String) -> Result<(), DBError> {
self.db.remove(&key).map_err(|e| DBError(e.to_string()))?;
self.types.remove(&key).map_err(|e| DBError(e.to_string()))?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(())
}
fn exists(&self, key: &str) -> Result<bool, DBError> {
// Check with expiration
Ok(self.get_storage_value(key)?.is_some())
}
fn keys(&self, pattern: &str) -> Result<Vec<String>, DBError> {
let mut keys = Vec::new();
for item in self.types.iter() {
let (key_bytes, _) = item.map_err(|e| DBError(e.to_string()))?;
let key = String::from_utf8_lossy(&key_bytes).to_string();
// Check if key is expired
if self.get_storage_value(&key)?.is_some() {
if Self::glob_match(pattern, &key) {
keys.push(key);
}
}
}
Ok(keys)
}
fn scan(&self, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError> {
let mut result = Vec::new();
let mut current_cursor = 0u64;
let limit = count.unwrap_or(10) as usize;
for item in self.types.iter() {
if current_cursor >= cursor {
let (key_bytes, type_bytes) = item.map_err(|e| DBError(e.to_string()))?;
let key = String::from_utf8_lossy(&key_bytes).to_string();
// Check pattern match
let matches = if let Some(pat) = pattern {
Self::glob_match(pat, &key)
} else {
true
};
if matches {
// Check if key is expired and get value
if let Some(storage_val) = self.get_storage_value(&key)? {
let value = match storage_val.value {
ValueType::String(s) => s,
_ => String::from_utf8_lossy(&type_bytes).to_string(),
};
result.push((key, value));
if result.len() >= limit {
current_cursor += 1;
break;
}
}
}
}
current_cursor += 1;
}
let next_cursor = if result.len() < limit { 0 } else { current_cursor };
Ok((next_cursor, result))
}
fn dbsize(&self) -> Result<i64, DBError> {
let mut count = 0i64;
for item in self.types.iter() {
let (key_bytes, _) = item.map_err(|e| DBError(e.to_string()))?;
let key = String::from_utf8_lossy(&key_bytes).to_string();
if self.get_storage_value(&key)?.is_some() {
count += 1;
}
}
Ok(count)
}
fn flushdb(&self) -> Result<(), DBError> {
self.db.clear().map_err(|e| DBError(e.to_string()))?;
self.types.clear().map_err(|e| DBError(e.to_string()))?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(())
}
fn get_key_type(&self, key: &str) -> Result<Option<String>, DBError> {
// First check if key exists (handles expiration)
if self.get_storage_value(key)?.is_some() {
match self.types.get(key).map_err(|e| DBError(e.to_string()))? {
Some(data) => Ok(Some(String::from_utf8_lossy(&data).to_string())),
None => Ok(None)
}
} else {
Ok(None)
}
}
// Hash operations
fn hset(&self, key: &str, pairs: Vec<(String, String)>) -> Result<i64, DBError> {
let mut storage_val = self.get_storage_value(key)?.unwrap_or(StorageValue {
value: ValueType::Hash(HashMap::new()),
expires_at: None,
});
let hash = match &mut storage_val.value {
ValueType::Hash(h) => h,
_ => return Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string())),
};
let mut new_fields = 0i64;
for (field, value) in pairs {
if !hash.contains_key(&field) {
new_fields += 1;
}
hash.insert(field, value);
}
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(new_fields)
}
fn hget(&self, key: &str, field: &str) -> Result<Option<String>, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::Hash(h) => Ok(h.get(field).cloned()),
_ => Ok(None)
}
None => Ok(None)
}
}
fn hgetall(&self, key: &str) -> Result<Vec<(String, String)>, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::Hash(h) => Ok(h.into_iter().collect()),
_ => Ok(Vec::new())
}
None => Ok(Vec::new())
}
}
fn hscan(&self, key: &str, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::Hash(h) => {
let mut result = Vec::new();
let mut current_cursor = 0u64;
let limit = count.unwrap_or(10) as usize;
for (field, value) in h.iter() {
if current_cursor >= cursor {
let matches = if let Some(pat) = pattern {
Self::glob_match(pat, field)
} else {
true
};
if matches {
result.push((field.clone(), value.clone()));
if result.len() >= limit {
current_cursor += 1;
break;
}
}
}
current_cursor += 1;
}
let next_cursor = if result.len() < limit { 0 } else { current_cursor };
Ok((next_cursor, result))
}
_ => Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string()))
}
None => Ok((0, Vec::new()))
}
}
fn hdel(&self, key: &str, fields: Vec<String>) -> Result<i64, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(0)
};
let hash = match &mut storage_val.value {
ValueType::Hash(h) => h,
_ => return Ok(0)
};
let mut deleted = 0i64;
for field in fields {
if hash.remove(&field).is_some() {
deleted += 1;
}
}
if hash.is_empty() {
self.del(key.to_string())?;
} else {
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
}
Ok(deleted)
}
fn hexists(&self, key: &str, field: &str) -> Result<bool, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::Hash(h) => Ok(h.contains_key(field)),
_ => Ok(false)
}
None => Ok(false)
}
}
fn hkeys(&self, key: &str) -> Result<Vec<String>, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::Hash(h) => Ok(h.keys().cloned().collect()),
_ => Ok(Vec::new())
}
None => Ok(Vec::new())
}
}
fn hvals(&self, key: &str) -> Result<Vec<String>, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::Hash(h) => Ok(h.values().cloned().collect()),
_ => Ok(Vec::new())
}
None => Ok(Vec::new())
}
}
fn hlen(&self, key: &str) -> Result<i64, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::Hash(h) => Ok(h.len() as i64),
_ => Ok(0)
}
None => Ok(0)
}
}
fn hmget(&self, key: &str, fields: Vec<String>) -> Result<Vec<Option<String>>, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::Hash(h) => {
Ok(fields.into_iter().map(|f| h.get(&f).cloned()).collect())
}
_ => Ok(fields.into_iter().map(|_| None).collect())
}
None => Ok(fields.into_iter().map(|_| None).collect())
}
}
fn hsetnx(&self, key: &str, field: &str, value: &str) -> Result<bool, DBError> {
let mut storage_val = self.get_storage_value(key)?.unwrap_or(StorageValue {
value: ValueType::Hash(HashMap::new()),
expires_at: None,
});
let hash = match &mut storage_val.value {
ValueType::Hash(h) => h,
_ => return Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string())),
};
if hash.contains_key(field) {
Ok(false)
} else {
hash.insert(field.to_string(), value.to_string());
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(true)
}
}
// List operations
fn lpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError> {
let mut storage_val = self.get_storage_value(key)?.unwrap_or(StorageValue {
value: ValueType::List(Vec::new()),
expires_at: None,
});
let list = match &mut storage_val.value {
ValueType::List(l) => l,
_ => return Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string())),
};
for element in elements.into_iter().rev() {
list.insert(0, element);
}
let len = list.len() as i64;
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(len)
}
fn rpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError> {
let mut storage_val = self.get_storage_value(key)?.unwrap_or(StorageValue {
value: ValueType::List(Vec::new()),
expires_at: None,
});
let list = match &mut storage_val.value {
ValueType::List(l) => l,
_ => return Err(DBError("WRONGTYPE Operation against a key holding the wrong kind of value".to_string())),
};
list.extend(elements);
let len = list.len() as i64;
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(len)
}
fn lpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(Vec::new())
};
let list = match &mut storage_val.value {
ValueType::List(l) => l,
_ => return Ok(Vec::new())
};
let mut result = Vec::new();
for _ in 0..count.min(list.len() as u64) {
if let Some(elem) = list.first() {
result.push(elem.clone());
list.remove(0);
}
}
if list.is_empty() {
self.del(key.to_string())?;
} else {
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
}
Ok(result)
}
fn rpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(Vec::new())
};
let list = match &mut storage_val.value {
ValueType::List(l) => l,
_ => return Ok(Vec::new())
};
let mut result = Vec::new();
for _ in 0..count.min(list.len() as u64) {
if let Some(elem) = list.pop() {
result.push(elem);
}
}
if list.is_empty() {
self.del(key.to_string())?;
} else {
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
}
Ok(result)
}
fn llen(&self, key: &str) -> Result<i64, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::List(l) => Ok(l.len() as i64),
_ => Ok(0)
}
None => Ok(0)
}
}
fn lindex(&self, key: &str, index: i64) -> Result<Option<String>, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::List(list) => {
let actual_index = if index < 0 {
list.len() as i64 + index
} else {
index
};
if actual_index >= 0 && (actual_index as usize) < list.len() {
Ok(Some(list[actual_index as usize].clone()))
} else {
Ok(None)
}
}
_ => Ok(None)
}
None => Ok(None)
}
}
fn lrange(&self, key: &str, start: i64, stop: i64) -> Result<Vec<String>, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => match storage_val.value {
ValueType::List(list) => {
if list.is_empty() {
return Ok(Vec::new());
}
let len = list.len() as i64;
let start_idx = if start < 0 {
std::cmp::max(0, len + start)
} else {
std::cmp::min(start, len)
};
let stop_idx = if stop < 0 {
std::cmp::max(-1, len + stop)
} else {
std::cmp::min(stop, len - 1)
};
if start_idx > stop_idx || start_idx >= len {
return Ok(Vec::new());
}
let start_usize = start_idx as usize;
let stop_usize = (stop_idx + 1) as usize;
Ok(list[start_usize..std::cmp::min(stop_usize, list.len())].to_vec())
}
_ => Ok(Vec::new())
}
None => Ok(Vec::new())
}
}
fn ltrim(&self, key: &str, start: i64, stop: i64) -> Result<(), DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(())
};
let list = match &mut storage_val.value {
ValueType::List(l) => l,
_ => return Ok(())
};
if list.is_empty() {
return Ok(());
}
let len = list.len() as i64;
let start_idx = if start < 0 {
std::cmp::max(0, len + start)
} else {
std::cmp::min(start, len)
};
let stop_idx = if stop < 0 {
std::cmp::max(-1, len + stop)
} else {
std::cmp::min(stop, len - 1)
};
if start_idx > stop_idx || start_idx >= len {
self.del(key.to_string())?;
} else {
let start_usize = start_idx as usize;
let stop_usize = (stop_idx + 1) as usize;
*list = list[start_usize..std::cmp::min(stop_usize, list.len())].to_vec();
if list.is_empty() {
self.del(key.to_string())?;
} else {
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
}
}
Ok(())
}
fn lrem(&self, key: &str, count: i64, element: &str) -> Result<i64, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(0)
};
let list = match &mut storage_val.value {
ValueType::List(l) => l,
_ => return Ok(0)
};
let mut removed = 0i64;
if count == 0 {
// Remove all occurrences
let original_len = list.len();
list.retain(|x| x != element);
removed = (original_len - list.len()) as i64;
} else if count > 0 {
// Remove first count occurrences
let mut to_remove = count as usize;
list.retain(|x| {
if x == element && to_remove > 0 {
to_remove -= 1;
removed += 1;
false
} else {
true
}
});
} else {
// Remove last |count| occurrences
let mut to_remove = (-count) as usize;
for i in (0..list.len()).rev() {
if list[i] == element && to_remove > 0 {
list.remove(i);
to_remove -= 1;
removed += 1;
}
}
}
if list.is_empty() {
self.del(key.to_string())?;
} else {
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
}
Ok(removed)
}
// Expiration
fn ttl(&self, key: &str) -> Result<i64, DBError> {
match self.get_storage_value(key)? {
Some(storage_val) => {
if let Some(expires_at) = storage_val.expires_at {
let now = Self::now_millis();
if now >= expires_at {
Ok(-2) // Key has expired
} else {
Ok(((expires_at - now) / 1000) as i64) // TTL in seconds
}
} else {
Ok(-1) // Key exists but has no expiration
}
}
None => Ok(-2) // Key does not exist
}
}
fn expire_seconds(&self, key: &str, secs: u64) -> Result<bool, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(false)
};
storage_val.expires_at = Some(Self::now_millis() + (secs as u128) * 1000);
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(true)
}
fn pexpire_millis(&self, key: &str, ms: u128) -> Result<bool, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(false)
};
storage_val.expires_at = Some(Self::now_millis() + ms);
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(true)
}
fn persist(&self, key: &str) -> Result<bool, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(false)
};
if storage_val.expires_at.is_some() {
storage_val.expires_at = None;
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(true)
} else {
Ok(false)
}
}
fn expire_at_seconds(&self, key: &str, ts_secs: i64) -> Result<bool, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(false)
};
let expires_at_ms: u128 = if ts_secs <= 0 { 0 } else { (ts_secs as u128) * 1000 };
storage_val.expires_at = Some(expires_at_ms);
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(true)
}
fn pexpire_at_millis(&self, key: &str, ts_ms: i64) -> Result<bool, DBError> {
let mut storage_val = match self.get_storage_value(key)? {
Some(sv) => sv,
None => return Ok(false)
};
let expires_at_ms: u128 = if ts_ms <= 0 { 0 } else { ts_ms as u128 };
storage_val.expires_at = Some(expires_at_ms);
self.set_storage_value(key, storage_val)?;
self.db.flush().map_err(|e| DBError(e.to_string()))?;
Ok(true)
}
fn is_encrypted(&self) -> bool {
self.crypto.is_some()
}
fn info(&self) -> Result<Vec<(String, String)>, DBError> {
let dbsize = self.dbsize()?;
Ok(vec![
("db_size".to_string(), dbsize.to_string()),
("is_encrypted".to_string(), self.is_encrypted().to_string()),
])
}
fn clone_arc(&self) -> Arc<dyn StorageBackend> {
// Note: This is a simplified clone - in production you might want to
// handle this differently as sled::Db is already Arc internally
Arc::new(SledStorage {
db: self.db.clone(),
types: self.types.clone(),
crypto: self.crypto.clone(),
})
}
}

58
src/storage_trait.rs Normal file
View File

@@ -0,0 +1,58 @@
// src/storage_trait.rs
use crate::error::DBError;
use std::sync::Arc;
pub trait StorageBackend: Send + Sync {
// Basic key operations
fn get(&self, key: &str) -> Result<Option<String>, DBError>;
fn set(&self, key: String, value: String) -> Result<(), DBError>;
fn setx(&self, key: String, value: String, expire_ms: u128) -> Result<(), DBError>;
fn del(&self, key: String) -> Result<(), DBError>;
fn exists(&self, key: &str) -> Result<bool, DBError>;
fn keys(&self, pattern: &str) -> Result<Vec<String>, DBError>;
fn dbsize(&self) -> Result<i64, DBError>;
fn flushdb(&self) -> Result<(), DBError>;
fn get_key_type(&self, key: &str) -> Result<Option<String>, DBError>;
// Scanning
fn scan(&self, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError>;
fn hscan(&self, key: &str, cursor: u64, pattern: Option<&str>, count: Option<u64>) -> Result<(u64, Vec<(String, String)>), DBError>;
// Hash operations
fn hset(&self, key: &str, pairs: Vec<(String, String)>) -> Result<i64, DBError>;
fn hget(&self, key: &str, field: &str) -> Result<Option<String>, DBError>;
fn hgetall(&self, key: &str) -> Result<Vec<(String, String)>, DBError>;
fn hdel(&self, key: &str, fields: Vec<String>) -> Result<i64, DBError>;
fn hexists(&self, key: &str, field: &str) -> Result<bool, DBError>;
fn hkeys(&self, key: &str) -> Result<Vec<String>, DBError>;
fn hvals(&self, key: &str) -> Result<Vec<String>, DBError>;
fn hlen(&self, key: &str) -> Result<i64, DBError>;
fn hmget(&self, key: &str, fields: Vec<String>) -> Result<Vec<Option<String>>, DBError>;
fn hsetnx(&self, key: &str, field: &str, value: &str) -> Result<bool, DBError>;
// List operations
fn lpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError>;
fn rpush(&self, key: &str, elements: Vec<String>) -> Result<i64, DBError>;
fn lpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError>;
fn rpop(&self, key: &str, count: u64) -> Result<Vec<String>, DBError>;
fn llen(&self, key: &str) -> Result<i64, DBError>;
fn lindex(&self, key: &str, index: i64) -> Result<Option<String>, DBError>;
fn lrange(&self, key: &str, start: i64, stop: i64) -> Result<Vec<String>, DBError>;
fn ltrim(&self, key: &str, start: i64, stop: i64) -> Result<(), DBError>;
fn lrem(&self, key: &str, count: i64, element: &str) -> Result<i64, DBError>;
// Expiration
fn ttl(&self, key: &str) -> Result<i64, DBError>;
fn expire_seconds(&self, key: &str, secs: u64) -> Result<bool, DBError>;
fn pexpire_millis(&self, key: &str, ms: u128) -> Result<bool, DBError>;
fn persist(&self, key: &str) -> Result<bool, DBError>;
fn expire_at_seconds(&self, key: &str, ts_secs: i64) -> Result<bool, DBError>;
fn pexpire_at_millis(&self, key: &str, ts_ms: i64) -> Result<bool, DBError>;
// Metadata
fn is_encrypted(&self) -> bool;
fn info(&self) -> Result<Vec<(String, String)>, DBError>;
// Clone to Arc for sharing
fn clone_arc(&self) -> Arc<dyn StorageBackend>;
}

123
src/sym.rs Normal file
View File

@@ -0,0 +1,123 @@
//! sym.rs — Stateless symmetric encryption (Phase 1)
//!
//! Commands implemented (RESP):
//! - SYM KEYGEN
//! - SYM ENCRYPT <key_b64> <message>
//! - SYM DECRYPT <key_b64> <ciphertext_b64>
//!
//! Notes:
//! - Raw key: exactly 32 bytes, provided as Base64 in commands.
//! - Cipher: XChaCha20-Poly1305 (AEAD) without AAD in Phase 1
//! - Ciphertext binary layout: [version:1][nonce:24][ciphertext||tag]
//! - Encoding for wire I/O: Base64
use base64::{engine::general_purpose::STANDARD as B64, Engine as _};
use chacha20poly1305::{
aead::{Aead, KeyInit, OsRng},
XChaCha20Poly1305, XNonce,
};
use rand::RngCore;
use crate::protocol::Protocol;
const VERSION: u8 = 1;
const NONCE_LEN: usize = 24;
const TAG_LEN: usize = 16;
#[derive(Debug)]
pub enum SymWireError {
InvalidKey,
BadEncoding,
BadFormat,
BadVersion(u8),
Crypto,
}
impl SymWireError {
fn to_protocol(self) -> Protocol {
match self {
SymWireError::InvalidKey => Protocol::err("ERR sym: invalid key"),
SymWireError::BadEncoding => Protocol::err("ERR sym: bad encoding"),
SymWireError::BadFormat => Protocol::err("ERR sym: bad format"),
SymWireError::BadVersion(v) => Protocol::err(&format!("ERR sym: unsupported version {}", v)),
SymWireError::Crypto => Protocol::err("ERR sym: auth failed"),
}
}
}
fn decode_key_b64(s: &str) -> Result<chacha20poly1305::Key, SymWireError> {
let bytes = B64.decode(s.as_bytes()).map_err(|_| SymWireError::BadEncoding)?;
if bytes.len() != 32 {
return Err(SymWireError::InvalidKey);
}
Ok(chacha20poly1305::Key::from_slice(&bytes).to_owned())
}
fn encrypt_blob(key: &chacha20poly1305::Key, plaintext: &[u8]) -> Result<Vec<u8>, SymWireError> {
let cipher = XChaCha20Poly1305::new(key);
let mut nonce_bytes = [0u8; NONCE_LEN];
OsRng.fill_bytes(&mut nonce_bytes);
let nonce = XNonce::from_slice(&nonce_bytes);
let mut out = Vec::with_capacity(1 + NONCE_LEN + plaintext.len() + TAG_LEN);
out.push(VERSION);
out.extend_from_slice(&nonce_bytes);
let ct = cipher.encrypt(nonce, plaintext).map_err(|_| SymWireError::Crypto)?;
out.extend_from_slice(&ct);
Ok(out)
}
fn decrypt_blob(key: &chacha20poly1305::Key, blob: &[u8]) -> Result<Vec<u8>, SymWireError> {
if blob.len() < 1 + NONCE_LEN + TAG_LEN {
return Err(SymWireError::BadFormat);
}
let ver = blob[0];
if ver != VERSION {
return Err(SymWireError::BadVersion(ver));
}
let nonce = XNonce::from_slice(&blob[1..1 + NONCE_LEN]);
let ct = &blob[1 + NONCE_LEN..];
let cipher = XChaCha20Poly1305::new(key);
cipher.decrypt(nonce, ct).map_err(|_| SymWireError::Crypto)
}
// ---------- Command handlers (RESP) ----------
pub async fn cmd_sym_keygen() -> Protocol {
let mut key_bytes = [0u8; 32];
OsRng.fill_bytes(&mut key_bytes);
let key_b64 = B64.encode(key_bytes);
Protocol::BulkString(key_b64)
}
pub async fn cmd_sym_encrypt(key_b64: &str, message: &str) -> Protocol {
let key = match decode_key_b64(key_b64) {
Ok(k) => k,
Err(e) => return e.to_protocol(),
};
match encrypt_blob(&key, message.as_bytes()) {
Ok(blob) => Protocol::BulkString(B64.encode(blob)),
Err(e) => e.to_protocol(),
}
}
pub async fn cmd_sym_decrypt(key_b64: &str, ct_b64: &str) -> Protocol {
let key = match decode_key_b64(key_b64) {
Ok(k) => k,
Err(e) => return e.to_protocol(),
};
let blob = match B64.decode(ct_b64.as_bytes()) {
Ok(b) => b,
Err(_) => return SymWireError::BadEncoding.to_protocol(),
};
match decrypt_blob(&key, &blob) {
Ok(pt) => match String::from_utf8(pt) {
Ok(s) => Protocol::BulkString(s),
Err(_) => Protocol::err("ERR sym: invalid UTF-8 plaintext"),
},
Err(e) => e.to_protocol(),
}
}

667
src/tantivy_search.rs Normal file
View File

@@ -0,0 +1,667 @@
use crate::error::DBError;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use tantivy::{
collector::TopDocs,
directory::MmapDirectory,
query::{BooleanQuery, Occur, Query, QueryParser, TermQuery},
schema::{
DateOptions, Field, IndexRecordOption, NumericOptions, Schema, TextFieldIndexing, TextOptions, STORED, STRING,
},
tokenizer::TokenizerManager,
DateTime, Index, IndexReader, IndexWriter, TantivyDocument, Term,
};
use tantivy::schema::Value;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FieldDef {
Text {
stored: bool,
indexed: bool,
tokenized: bool,
fast: bool,
},
Numeric {
stored: bool,
indexed: bool,
fast: bool,
precision: NumericType,
},
Tag {
stored: bool,
separator: String,
case_sensitive: bool,
},
Geo {
stored: bool,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum NumericType {
I64,
U64,
F64,
Date,
}
pub struct IndexSchema {
schema: Schema,
fields: HashMap<String, (Field, FieldDef)>,
default_search_fields: Vec<Field>,
}
pub struct TantivySearch {
index: Index,
writer: Arc<RwLock<IndexWriter>>,
reader: IndexReader,
index_schema: IndexSchema,
name: String,
config: IndexConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexConfig {
pub language: String,
pub stopwords: Vec<String>,
pub stemming: bool,
pub max_doc_count: Option<usize>,
pub default_score: f64,
}
impl Default for IndexConfig {
fn default() -> Self {
IndexConfig {
language: "english".to_string(),
stopwords: vec![],
stemming: true,
max_doc_count: None,
default_score: 1.0,
}
}
}
impl TantivySearch {
pub fn new_with_schema(
base_path: PathBuf,
name: String,
field_definitions: Vec<(String, FieldDef)>,
config: Option<IndexConfig>,
) -> Result<Self, DBError> {
let index_path = base_path.join(&name);
std::fs::create_dir_all(&index_path)
.map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?;
// Build schema from field definitions
let mut schema_builder = Schema::builder();
let mut fields = HashMap::new();
let mut default_search_fields = Vec::new();
// Always add a document ID field
let id_field = schema_builder.add_text_field("_id", STRING | STORED);
fields.insert(
"_id".to_string(),
(
id_field,
FieldDef::Text {
stored: true,
indexed: true,
tokenized: false,
fast: false,
},
),
);
// Add user-defined fields
for (field_name, field_def) in field_definitions {
let field = match &field_def {
FieldDef::Text {
stored,
indexed,
tokenized,
fast: _fast,
} => {
let mut text_options = TextOptions::default();
if *stored {
text_options = text_options.set_stored();
}
if *indexed {
let indexing_options = if *tokenized {
TextFieldIndexing::default()
.set_tokenizer("default")
.set_index_option(IndexRecordOption::WithFreqsAndPositions)
} else {
TextFieldIndexing::default()
.set_tokenizer("raw")
.set_index_option(IndexRecordOption::Basic)
};
text_options = text_options.set_indexing_options(indexing_options);
let f = schema_builder.add_text_field(&field_name, text_options);
if *tokenized {
default_search_fields.push(f);
}
f
} else {
schema_builder.add_text_field(&field_name, text_options)
}
}
FieldDef::Numeric {
stored,
indexed,
fast,
precision,
} => match precision {
NumericType::I64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_i64_field(&field_name, opts)
}
NumericType::U64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_u64_field(&field_name, opts)
}
NumericType::F64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_f64_field(&field_name, opts)
}
NumericType::Date => {
let mut opts = DateOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_date_field(&field_name, opts)
}
},
FieldDef::Tag {
stored,
separator: _,
case_sensitive: _,
} => {
let mut text_options = TextOptions::default();
if *stored {
text_options = text_options.set_stored();
}
text_options = text_options.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("raw")
.set_index_option(IndexRecordOption::Basic),
);
schema_builder.add_text_field(&field_name, text_options)
}
FieldDef::Geo { stored } => {
// For now, store as two f64 fields for lat/lon
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
opts = opts.set_indexed().set_fast();
let lat_field =
schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone());
let lon_field =
schema_builder.add_f64_field(&format!("{}_lon", field_name), opts);
fields.insert(
format!("{}_lat", field_name),
(
lat_field,
FieldDef::Numeric {
stored: *stored,
indexed: true,
fast: true,
precision: NumericType::F64,
},
),
);
fields.insert(
format!("{}_lon", field_name),
(
lon_field,
FieldDef::Numeric {
stored: *stored,
indexed: true,
fast: true,
precision: NumericType::F64,
},
),
);
continue; // Skip adding the geo field itself
}
};
fields.insert(field_name.clone(), (field, field_def));
}
let schema = schema_builder.build();
let index_schema = IndexSchema {
schema: schema.clone(),
fields,
default_search_fields,
};
// Create or open index
let dir = MmapDirectory::open(&index_path)
.map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?;
let mut index =
Index::open_or_create(dir, schema).map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
// Configure tokenizers
let tokenizer_manager = TokenizerManager::default();
index.set_tokenizers(tokenizer_manager);
let writer = index
.writer(15_000_000)
.map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?;
let reader = index
.reader()
.map_err(|e| DBError(format!("Failed to create reader: {}", e)))?;
let config = config.unwrap_or_default();
Ok(TantivySearch {
index,
writer: Arc::new(RwLock::new(writer)),
reader,
index_schema,
name,
config,
})
}
pub fn add_document_with_fields(
&self,
doc_id: &str,
fields: HashMap<String, String>,
) -> Result<(), DBError> {
let mut writer = self
.writer
.write()
.map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?;
// Delete existing document with same ID
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
writer.delete_term(Term::from_field_text(*id_field, doc_id));
}
// Create new document
let mut doc = tantivy::doc!();
// Add document ID
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
doc.add_text(*id_field, doc_id);
}
// Add other fields based on schema
for (field_name, field_value) in fields {
if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) {
match field_def {
FieldDef::Text { .. } => {
doc.add_text(*field, &field_value);
}
FieldDef::Numeric { precision, .. } => match precision {
NumericType::I64 => {
if let Ok(v) = field_value.parse::<i64>() {
doc.add_i64(*field, v);
}
}
NumericType::U64 => {
if let Ok(v) = field_value.parse::<u64>() {
doc.add_u64(*field, v);
}
}
NumericType::F64 => {
if let Ok(v) = field_value.parse::<f64>() {
doc.add_f64(*field, v);
}
}
NumericType::Date => {
if let Ok(v) = field_value.parse::<i64>() {
doc.add_date(*field, DateTime::from_timestamp_millis(v));
}
}
},
FieldDef::Tag {
separator,
case_sensitive,
..
} => {
let tags = if !case_sensitive {
field_value.to_lowercase()
} else {
field_value.clone()
};
for tag in tags.split(separator.as_str()) {
doc.add_text(*field, tag.trim());
}
}
FieldDef::Geo { .. } => {
let parts: Vec<&str> = field_value.split(',').collect();
if parts.len() == 2 {
if let (Ok(lat), Ok(lon)) =
(parts[0].parse::<f64>(), parts[1].parse::<f64>())
{
if let Some((lat_field, _)) =
self.index_schema.fields.get(&format!("{}_lat", field_name))
{
doc.add_f64(*lat_field, lat);
}
if let Some((lon_field, _)) =
self.index_schema.fields.get(&format!("{}_lon", field_name))
{
doc.add_f64(*lon_field, lon);
}
}
}
}
}
}
}
writer
.add_document(doc)
.map_err(|e| DBError(format!("Failed to add document: {}", e)))?;
writer
.commit()
.map_err(|e| DBError(format!("Failed to commit: {}", e)))?;
Ok(())
}
pub fn search_with_options(
&self,
query_str: &str,
options: SearchOptions,
) -> Result<SearchResults, DBError> {
let searcher = self.reader.searcher();
// Ensure we have searchable fields
if self.index_schema.default_search_fields.is_empty() {
return Err(DBError("No searchable fields defined in schema".to_string()));
}
// Parse query based on search fields
let query_parser = QueryParser::for_index(
&self.index,
self.index_schema.default_search_fields.clone(),
);
let parsed_query = query_parser
.parse_query(query_str)
.map_err(|e| DBError(format!("Failed to parse query: {}", e)))?;
let mut clauses: Vec<(Occur, Box<dyn Query>)> = vec![(Occur::Must, parsed_query)];
// Apply filters if any
for filter in options.filters {
if let Some((field, field_def)) = self.index_schema.fields.get(&filter.field) {
match filter.filter_type {
FilterType::Equals(value) => {
match field_def {
FieldDef::Text { .. } | FieldDef::Tag { .. } => {
let term_query =
TermQuery::new(Term::from_field_text(*field, &value), IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(term_query)));
}
FieldDef::Numeric { precision, .. } => {
// Equals on numeric fields: parse to the right numeric type and use term query
match precision {
NumericType::I64 => {
if let Ok(v) = value.parse::<i64>() {
let term = Term::from_field_i64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::U64 => {
if let Ok(v) = value.parse::<u64>() {
let term = Term::from_field_u64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::F64 => {
if let Ok(v) = value.parse::<f64>() {
let term = Term::from_field_f64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::Date => {
if let Ok(v) = value.parse::<i64>() {
let dt = DateTime::from_timestamp_millis(v);
let term = Term::from_field_date(*field, dt);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
}
}
FieldDef::Geo { .. } => {
// Geo equals isn't supported in this simplified version
}
}
}
FilterType::Range { .. } => {
// TODO: Implement numeric range queries by building a RangeQuery per type
}
FilterType::InSet(values) => {
// OR across values
let mut sub_clauses: Vec<(Occur, Box<dyn Query>)> = vec![];
for value in values {
let term_query = TermQuery::new(
Term::from_field_text(*field, &value),
IndexRecordOption::Basic,
);
sub_clauses.push((Occur::Should, Box::new(term_query)));
}
clauses.push((Occur::Must, Box::new(BooleanQuery::new(sub_clauses))));
}
}
}
}
let final_query: Box<dyn Query> = if clauses.len() == 1 {
clauses.pop().unwrap().1
} else {
Box::new(BooleanQuery::new(clauses))
};
// Execute search
let top_docs = searcher
.search(&*final_query, &TopDocs::with_limit(options.limit + options.offset))
.map_err(|e| DBError(format!("Search failed: {}", e)))?;
let total_hits = top_docs.len();
let mut documents = Vec::new();
for (score, doc_address) in top_docs.into_iter().skip(options.offset).take(options.limit) {
let retrieved_doc: TantivyDocument = searcher
.doc(doc_address)
.map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?;
let mut doc_fields = HashMap::new();
// Extract stored fields (or synthesize)
for (field_name, (field, field_def)) in &self.index_schema.fields {
match field_def {
FieldDef::Text { stored, .. } | FieldDef::Tag { stored, .. } => {
if *stored {
if let Some(value) = retrieved_doc.get_first(*field) {
if let Some(text) = value.as_str() {
doc_fields.insert(field_name.clone(), text.to_string());
}
}
}
}
FieldDef::Numeric {
stored, precision, ..
} => {
if *stored {
let value_str = match precision {
NumericType::I64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_i64())
.map(|v| v.to_string()),
NumericType::U64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_u64())
.map(|v| v.to_string()),
NumericType::F64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_f64())
.map(|v| v.to_string()),
NumericType::Date => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_datetime())
.map(|v| v.into_timestamp_millis().to_string()),
};
if let Some(v) = value_str {
doc_fields.insert(field_name.clone(), v);
}
}
}
FieldDef::Geo { stored } => {
if *stored {
let lat_field = self
.index_schema
.fields
.get(&format!("{}_lat", field_name))
.unwrap()
.0;
let lon_field = self
.index_schema
.fields
.get(&format!("{}_lon", field_name))
.unwrap()
.0;
let lat = retrieved_doc.get_first(lat_field).and_then(|v| v.as_f64());
let lon = retrieved_doc.get_first(lon_field).and_then(|v| v.as_f64());
if let (Some(lat), Some(lon)) = (lat, lon) {
doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon));
}
}
}
}
}
documents.push(SearchDocument {
fields: doc_fields,
score,
});
}
Ok(SearchResults {
total: total_hits,
documents,
})
}
pub fn get_info(&self) -> Result<IndexInfo, DBError> {
let searcher = self.reader.searcher();
let num_docs = searcher.num_docs();
let fields_info: Vec<FieldInfo> = self
.index_schema
.fields
.iter()
.map(|(name, (_, def))| FieldInfo {
name: name.clone(),
field_type: format!("{:?}", def),
})
.collect();
Ok(IndexInfo {
name: self.name.clone(),
num_docs,
fields: fields_info,
config: self.config.clone(),
})
}
}
#[derive(Debug, Clone)]
pub struct SearchOptions {
pub limit: usize,
pub offset: usize,
pub filters: Vec<Filter>,
pub sort_by: Option<String>,
pub return_fields: Option<Vec<String>>,
pub highlight: bool,
}
impl Default for SearchOptions {
fn default() -> Self {
SearchOptions {
limit: 10,
offset: 0,
filters: vec![],
sort_by: None,
return_fields: None,
highlight: false,
}
}
}
#[derive(Debug, Clone)]
pub struct Filter {
pub field: String,
pub filter_type: FilterType,
}
#[derive(Debug, Clone)]
pub enum FilterType {
Equals(String),
Range { min: String, max: String },
InSet(Vec<String>),
}
#[derive(Debug)]
pub struct SearchResults {
pub total: usize,
pub documents: Vec<SearchDocument>,
}
#[derive(Debug)]
pub struct SearchDocument {
pub fields: HashMap<String, String>,
pub score: f32,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct IndexInfo {
pub name: String,
pub num_docs: u64,
pub fields: Vec<FieldInfo>,
pub config: IndexConfig,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct FieldInfo {
pub name: String,
pub field_type: String,
}

View File

@@ -1,10 +1,11 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# Test script for HeroDB - Redis-compatible database with redb backend
# This script starts the server and runs comprehensive tests
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
@@ -298,7 +299,7 @@ main() {
# Start the server
print_status "Starting HeroDB server..."
./target/release/herodb --dir "$DB_DIR" --port $PORT &
../target/release/herodb --dir "$DB_DIR" --port $PORT &
SERVER_PID=$!
# Wait for server to start
@@ -352,4 +353,4 @@ check_dependencies() {
# Run dependency check and main function
check_dependencies
main "$@"
main "$@"

View File

@@ -27,6 +27,8 @@ async fn debug_hset_simple() {
debug: false,
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let mut server = Server::new(option).await;
@@ -47,6 +49,12 @@ async fn debug_hset_simple() {
sleep(Duration::from_millis(200)).await;
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
// Acquire ReadWrite permissions on this connection
let resp = send_command(
&mut stream,
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
).await;
assert!(resp.contains("OK"), "Failed SELECT handshake: {}", resp);
// Test simple HSET
println!("Testing HSET...");

View File

@@ -18,6 +18,8 @@ async fn debug_hset_return_value() {
debug: false,
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let mut server = Server::new(option).await;
@@ -39,12 +41,19 @@ async fn debug_hset_return_value() {
// Connect and test HSET
let mut stream = TcpStream::connect("127.0.0.1:16390").await.unwrap();
// Acquire ReadWrite permissions for this new connection
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
stream.write_all(handshake.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
let resp = String::from_utf8_lossy(&buffer[..n]);
assert!(resp.contains("OK"), "Failed SELECT handshake: {}", resp);
// Send HSET command
let cmd = "*4\r\n$4\r\nHSET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n";
stream.write_all(cmd.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
let response = String::from_utf8_lossy(&buffer[..n]);

View File

@@ -12,7 +12,15 @@ fn get_redis_connection(port: u16) -> Connection {
match client.get_connection() {
Ok(mut conn) => {
if redis::cmd("PING").query::<String>(&mut conn).is_ok() {
return conn;
// Acquire ReadWrite permissions on this connection
let sel: RedisResult<String> = redis::cmd("SELECT")
.arg(0)
.arg("KEY")
.arg("test-admin")
.query(&mut conn);
if sel.is_ok() {
return conn;
}
}
}
Err(e) => {
@@ -78,6 +86,8 @@ fn setup_server() -> (ServerProcessGuard, u16) {
"--port",
&port.to_string(),
"--debug",
"--admin-secret",
"test-admin",
])
.spawn()
.expect("Failed to start server process");

View File

@@ -22,18 +22,30 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
debug: true,
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let server = Server::new(option).await;
(server, port)
}
// Helper function to connect to the test server
// Helper function to connect to the test server
async fn connect_to_server(port: u16) -> TcpStream {
let mut attempts = 0;
loop {
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
Ok(stream) => return stream,
Ok(mut stream) => {
// Obtain ReadWrite permissions for this connection by selecting DB 0 with admin key
let resp = send_command(
&mut stream,
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
).await;
if !resp.contains("OK") {
panic!("Failed to acquire write permissions via SELECT 0 KEY test-admin: {}", resp);
}
return stream;
}
Err(_) if attempts < 10 => {
attempts += 1;
sleep(Duration::from_millis(100)).await;

85
tests/rpc_tests.rs Normal file
View File

@@ -0,0 +1,85 @@
use herodb::rpc::{BackendType, DatabaseConfig};
use herodb::admin_meta;
use herodb::options::BackendType as OptionsBackendType;
#[tokio::test]
async fn test_rpc_server_basic() {
// This test would require starting the RPC server in a separate thread
// For now, we'll just test that the types compile correctly
// Test serialization of types
let backend = BackendType::Redb;
let config = DatabaseConfig {
name: Some("test_db".to_string()),
storage_path: Some("/tmp/test".to_string()),
max_size: Some(1024 * 1024),
redis_version: Some("7.0".to_string()),
};
let backend_json = serde_json::to_string(&backend).unwrap();
let config_json = serde_json::to_string(&config).unwrap();
assert_eq!(backend_json, "\"Redb\"");
assert!(config_json.contains("test_db"));
}
#[tokio::test]
async fn test_database_config_serialization() {
let config = DatabaseConfig {
name: Some("my_db".to_string()),
storage_path: None,
max_size: Some(1000000),
redis_version: Some("7.0".to_string()),
};
let json = serde_json::to_value(&config).unwrap();
assert_eq!(json["name"], "my_db");
assert_eq!(json["max_size"], 1000000);
assert_eq!(json["redis_version"], "7.0");
}
#[tokio::test]
async fn test_backend_type_serialization() {
// Test that both Redb and Sled backends serialize correctly
let redb_backend = BackendType::Redb;
let sled_backend = BackendType::Sled;
let redb_json = serde_json::to_string(&redb_backend).unwrap();
let sled_json = serde_json::to_string(&sled_backend).unwrap();
assert_eq!(redb_json, "\"Redb\"");
assert_eq!(sled_json, "\"Sled\"");
// Test deserialization
let redb_deserialized: BackendType = serde_json::from_str(&redb_json).unwrap();
let sled_deserialized: BackendType = serde_json::from_str(&sled_json).unwrap();
assert!(matches!(redb_deserialized, BackendType::Redb));
assert!(matches!(sled_deserialized, BackendType::Sled));
}
#[tokio::test]
async fn test_database_name_persistence() {
let base_dir = "/tmp/test_db_name_persistence";
let admin_secret = "test-admin-secret";
let backend = OptionsBackendType::Redb;
let db_id = 1;
let test_name = "test-database-name";
// Clean up any existing test data
let _ = std::fs::remove_dir_all(base_dir);
// Set the database name
admin_meta::set_database_name(base_dir, backend.clone(), admin_secret, db_id, test_name)
.expect("Failed to set database name");
// Retrieve the database name
let retrieved_name = admin_meta::get_database_name(base_dir, backend, admin_secret, db_id)
.expect("Failed to get database name");
// Verify the name matches
assert_eq!(retrieved_name, Some(test_name.to_string()));
// Clean up
let _ = std::fs::remove_dir_all(base_dir);
}

View File

@@ -24,6 +24,8 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
debug: true,
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let server = Server::new(option).await;
@@ -33,9 +35,16 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
// Helper function to send Redis command and get response
async fn send_redis_command(port: u16, command: &str) -> String {
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
// Acquire ReadWrite permissions on this new connection
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
stream.write_all(handshake.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let _ = stream.read(&mut buffer).await.unwrap(); // Read and ignore the OK for handshake
// Now send the intended command
stream.write_all(command.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
String::from_utf8_lossy(&buffer[..n]).to_string()
}
@@ -183,12 +192,19 @@ async fn test_transaction_operations() {
sleep(Duration::from_millis(100)).await;
// Use a single connection for the transaction
// Use a single connection for the transaction
let mut stream = TcpStream::connect(format!("127.0.0.1:{}", port)).await.unwrap();
// Acquire write permissions for this connection
let handshake = "*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n";
stream.write_all(handshake.as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
let resp = String::from_utf8_lossy(&buffer[..n]);
assert!(resp.contains("OK"));
// Test MULTI
stream.write_all("*1\r\n$5\r\nMULTI\r\n".as_bytes()).await.unwrap();
let mut buffer = [0; 1024];
let n = stream.read(&mut buffer).await.unwrap();
let response = String::from_utf8_lossy(&buffer[..n]);
assert!(response.contains("OK"));

View File

@@ -22,6 +22,8 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
debug: false,
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let server = Server::new(option).await;
@@ -37,12 +39,22 @@ async fn send_command(stream: &mut TcpStream, command: &str) -> String {
String::from_utf8_lossy(&buffer[..n]).to_string()
}
// Helper function to connect to the test server
// Helper function to connect to the test server
async fn connect_to_server(port: u16) -> TcpStream {
let mut attempts = 0;
loop {
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
Ok(stream) => return stream,
Ok(mut stream) => {
// Acquire ReadWrite permissions for this connection
let resp = send_command(
&mut stream,
"*4\r\n$6\r\nSELECT\r\n$1\r\n0\r\n$3\r\nKEY\r\n$10\r\ntest-admin\r\n",
).await;
if !resp.contains("OK") {
panic!("Failed to acquire write permissions via SELECT 0 KEY test-admin: {}", resp);
}
return stream;
}
Err(_) if attempts < 10 => {
attempts += 1;
sleep(Duration::from_millis(100)).await;
@@ -96,14 +108,21 @@ async fn test_hset_clean_db() {
sleep(Duration::from_millis(200)).await;
let mut stream = connect_to_server(port).await;
// Test HSET - should return 1 for new field
let response = send_command(&mut stream, "*4\r\n$4\r\nHSET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n").await;
// Ensure clean DB state (admin DB 0 may be shared due to global singleton)
let flush = send_command(&mut stream, "*1\r\n$7\r\nFLUSHDB\r\n").await;
assert!(flush.contains("OK"), "Failed to FLUSHDB: {}", flush);
// Test HSET - should return 1 for new field (use a unique key name to avoid collisions)
let key = "hash_clean";
let hset_cmd = format!("*4\r\n$4\r\nHSET\r\n${}\r\n{}\r\n$6\r\nfield1\r\n$6\r\nvalue1\r\n", key.len(), key);
let response = send_command(&mut stream, &hset_cmd).await;
println!("HSET response: {}", response);
assert!(response.contains("1"), "Expected HSET to return 1, got: {}", response);
// Test HGET
let response = send_command(&mut stream, "*3\r\n$4\r\nHGET\r\n$4\r\nhash\r\n$6\r\nfield1\r\n").await;
let hget_cmd = format!("*3\r\n$4\r\nHGET\r\n${}\r\n{}\r\n$6\r\nfield1\r\n", key.len(), key);
let response = send_command(&mut stream, &hget_cmd).await;
println!("HGET response: {}", response);
assert!(response.contains("value1"));
}

View File

@@ -22,6 +22,8 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
debug: false,
encrypt: false,
encryption_key: None,
backend: herodb::options::BackendType::Redb,
admin_secret: "test-admin".to_string(),
};
let server = Server::new(option).await;
@@ -60,7 +62,17 @@ async fn connect(port: u16) -> TcpStream {
let mut attempts = 0;
loop {
match TcpStream::connect(format!("127.0.0.1:{}", port)).await {
Ok(s) => return s,
Ok(mut s) => {
// Acquire ReadWrite permissions for this connection using admin DB 0
let resp = send_cmd(&mut s, &["SELECT", "0", "KEY", "test-admin"]).await;
assert_contains(&resp, "OK", "SELECT 0 KEY test-admin handshake");
// Ensure clean slate per test on DB 0
let fl = send_cmd(&mut s, &["FLUSHDB"]).await;
assert_contains(&fl, "OK", "FLUSHDB after handshake");
return s;
}
Err(_) if attempts < 30 => {
attempts += 1;
sleep(Duration::from_millis(100)).await;
@@ -245,9 +257,9 @@ async fn test_01_connection_and_info() {
let getname = send_cmd(&mut s, &["CLIENT", "GETNAME"]).await;
assert_contains(&getname, "myapp", "CLIENT GETNAME");
// SELECT db
let sel = send_cmd(&mut s, &["SELECT", "0"]).await;
assert_contains(&sel, "OK", "SELECT 0");
// SELECT db (requires key on DB 0)
let sel = send_cmd(&mut s, &["SELECT", "0", "KEY", "test-admin"]).await;
assert_contains(&sel, "OK", "SELECT 0 with key");
// QUIT should close connection after sending OK
let quit = send_cmd(&mut s, &["QUIT"]).await;
@@ -278,7 +290,11 @@ async fn test_02_strings_and_expiry() {
let ex0 = send_cmd(&mut s, &["EXISTS", "user:1"]).await;
assert_contains(&ex0, "0", "EXISTS after DEL");
// DEL non-existent should return 0
let del0 = send_cmd(&mut s, &["DEL", "user:1"]).await;
assert_contains(&del0, "0", "DEL user:1 when not exists -> 0");
// INCR behavior
let i1 = send_cmd(&mut s, &["INCR", "count"]).await;
assert_contains(&i1, "1", "INCR new key -> 1");
@@ -500,11 +516,11 @@ async fn test_07_age_stateless_suite() {
let mut s = connect(port).await;
// GENENC -> [recipient, identity]
let gen = send_cmd(&mut s, &["AGE", "GENENC"]).await;
let genenc = send_cmd(&mut s, &["AGE", "GENENC"]).await;
assert!(
gen.starts_with("*2\r\n$"),
genenc.starts_with("*2\r\n$"),
"AGE GENENC should return array [recipient, identity], got:\n{}",
gen
genenc
);
// Parse simple RESP array of two bulk strings to extract keys
@@ -519,7 +535,7 @@ async fn test_07_age_stateless_suite() {
let ident = lines.next().unwrap_or("").to_string();
(recip, ident)
}
let (recipient, identity) = parse_two_bulk_array(&gen);
let (recipient, identity) = parse_two_bulk_array(&genenc);
assert!(
recipient.starts_with("age1") && identity.starts_with("AGE-SECRET-KEY-1"),
"Unexpected AGE key formats.\nrecipient: {}\nidentity: {}",
@@ -590,7 +606,7 @@ async fn test_08_age_persistent_named_suite() {
// AGE LIST
let lst = send_cmd(&mut s, &["AGE", "LIST"]).await;
assert_contains(&lst, "encpub", "AGE LIST label encpub");
// After flattening, LIST returns a flat array of managed key names
assert_contains(&lst, "app1", "AGE LIST includes app1");
}