Compare commits
15 Commits
b68325016d
...
management
Author | SHA1 | Date | |
---|---|---|---|
|
bdf363016a | ||
|
8798bc202e | ||
|
9fa9832605 | ||
|
4bb24b38dd | ||
|
f3da14b957 | ||
|
5ea34b4445 | ||
|
d9a3b711d1 | ||
|
d931770e90 | ||
|
a87ec4dbb5 | ||
a1127b72da | |||
3850df89be | |||
45195d403e | |||
f17b441ca1 | |||
ff4ea1d844 | |||
c9e1dcdb6c |
978
Cargo.lock
generated
978
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
37
Cargo.toml
37
Cargo.toml
@@ -1,9 +1,30 @@
|
||||
[workspace]
|
||||
members = ["herodb"]
|
||||
resolver = "2"
|
||||
[package]
|
||||
name = "herodb"
|
||||
version = "0.0.1"
|
||||
authors = ["ThreeFold Tech NV"]
|
||||
edition = "2024"
|
||||
|
||||
# You can define shared profiles for all workspace members here
|
||||
[profile.release]
|
||||
lto = true
|
||||
codegen-units = 1
|
||||
strip = true
|
||||
[dependencies]
|
||||
anyhow = "1.0.59"
|
||||
bytes = "1.3.0"
|
||||
thiserror = "1.0.32"
|
||||
tokio = { version = "1.23.0", features = ["full"] }
|
||||
clap = { version = "4.5.20", features = ["derive"] }
|
||||
byteorder = "1.4.3"
|
||||
futures = "0.3"
|
||||
sled = "0.34"
|
||||
redb = "2.1.3"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
bincode = "1.3"
|
||||
chacha20poly1305 = "0.10.1"
|
||||
rand = "0.8"
|
||||
sha2 = "0.10"
|
||||
age = "0.10"
|
||||
secrecy = "0.8"
|
||||
ed25519-dalek = "2"
|
||||
base64 = "0.22"
|
||||
jsonrpsee = { version = "0.26.0", features = ["http-client", "ws-client", "server", "macros"] }
|
||||
|
||||
[dev-dependencies]
|
||||
redis = { version = "0.24", features = ["aio", "tokio-comp"] }
|
||||
|
@@ -47,13 +47,13 @@ You can start HeroDB with different backends and encryption options:
|
||||
#### `redb` with Encryption
|
||||
|
||||
```bash
|
||||
./target/release/herodb --dir /tmp/herodb_encrypted --port 6379 --encrypt --key mysecretkey
|
||||
./target/release/herodb --dir /tmp/herodb_encrypted --port 6379 --encrypt --encryption_key mysecretkey
|
||||
```
|
||||
|
||||
#### `sled` with Encryption
|
||||
|
||||
```bash
|
||||
./target/release/herodb --dir /tmp/herodb_sled_encrypted --port 6379 --sled --encrypt --key mysecretkey
|
||||
./target/release/herodb --dir /tmp/herodb_sled_encrypted --port 6379 --sled --encrypt --encryption_key mysecretkey
|
||||
```
|
||||
|
||||
## Usage with Redis Clients
|
@@ -70,6 +70,15 @@ MULTI/EXEC/DISCARD | ✅ | ❌ | Only supported in redb |
|
||||
**Encryption** | | | |
|
||||
Data-at-rest encryption | ✅ | ✅ | Both support [age](age.tech) encryption |
|
||||
AGE commands | ✅ | ✅ | Both support AGE crypto commands |
|
||||
**Full-Text Search** | | | |
|
||||
FT.CREATE | ✅ | ✅ | Create search index with schema |
|
||||
FT.ADD | ✅ | ✅ | Add document to search index |
|
||||
FT.SEARCH | ✅ | ✅ | Search documents with query |
|
||||
FT.DEL | ✅ | ✅ | Delete document from index |
|
||||
FT.INFO | ✅ | ✅ | Get index information |
|
||||
FT.DROP | ✅ | ✅ | Drop search index |
|
||||
FT.ALTER | ✅ | ✅ | Alter index schema |
|
||||
FT.AGGREGATE | ✅ | ✅ | Aggregate search results |
|
||||
|
||||
### Performance Considerations
|
||||
|
397
docs/search.md
Normal file
397
docs/search.md
Normal file
@@ -0,0 +1,397 @@
|
||||
# Full-Text Search with Tantivy
|
||||
|
||||
HeroDB includes powerful full-text search capabilities powered by [Tantivy](https://github.com/quickwit-oss/tantivy), a fast full-text search engine library written in Rust. This provides Redis-compatible search commands similar to RediSearch.
|
||||
|
||||
## Overview
|
||||
|
||||
The search functionality allows you to:
|
||||
- Create search indexes with custom schemas
|
||||
- Index documents with multiple field types
|
||||
- Perform complex queries with filters
|
||||
- Support for text, numeric, date, and geographic data
|
||||
- Real-time search with high performance
|
||||
|
||||
## Search Commands
|
||||
|
||||
### FT.CREATE - Create Search Index
|
||||
|
||||
Create a new search index with a defined schema.
|
||||
|
||||
```bash
|
||||
FT.CREATE index_name SCHEMA field_name field_type [options] [field_name field_type [options] ...]
|
||||
```
|
||||
|
||||
**Field Types:**
|
||||
- `TEXT` - Full-text searchable text fields
|
||||
- `NUMERIC` - Numeric fields (integers, floats)
|
||||
- `TAG` - Tag fields for exact matching
|
||||
- `GEO` - Geographic coordinates (lat,lon)
|
||||
- `DATE` - Date/timestamp fields
|
||||
|
||||
**Field Options:**
|
||||
- `STORED` - Store field value for retrieval
|
||||
- `INDEXED` - Make field searchable
|
||||
- `TOKENIZED` - Enable tokenization for text fields
|
||||
- `FAST` - Enable fast access for numeric fields
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
# Create a product search index
|
||||
FT.CREATE products SCHEMA
|
||||
title TEXT STORED INDEXED TOKENIZED
|
||||
description TEXT STORED INDEXED TOKENIZED
|
||||
price NUMERIC STORED INDEXED FAST
|
||||
category TAG STORED
|
||||
location GEO STORED
|
||||
created_date DATE STORED INDEXED
|
||||
```
|
||||
|
||||
### FT.ADD - Add Document to Index
|
||||
|
||||
Add a document to a search index.
|
||||
|
||||
```bash
|
||||
FT.ADD index_name doc_id [SCORE score] FIELDS field_name field_value [field_name field_value ...]
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
# Add a product document
|
||||
FT.ADD products product:1 SCORE 1.0 FIELDS
|
||||
title "Wireless Headphones"
|
||||
description "High-quality wireless headphones with noise cancellation"
|
||||
price 199.99
|
||||
category "electronics"
|
||||
location "37.7749,-122.4194"
|
||||
created_date 1640995200000
|
||||
```
|
||||
|
||||
### FT.SEARCH - Search Documents
|
||||
|
||||
Search for documents in an index.
|
||||
|
||||
```bash
|
||||
FT.SEARCH index_name query [LIMIT offset count] [FILTER field min max] [RETURN field [field ...]]
|
||||
```
|
||||
|
||||
**Query Syntax:**
|
||||
- Simple terms: `wireless headphones`
|
||||
- Phrase queries: `"noise cancellation"`
|
||||
- Field-specific: `title:wireless`
|
||||
- Boolean operators: `wireless AND headphones`
|
||||
- Wildcards: `head*`
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Simple text search
|
||||
FT.SEARCH products "wireless headphones"
|
||||
|
||||
# Search with filters
|
||||
FT.SEARCH products "headphones" FILTER price 100 300 LIMIT 0 10
|
||||
|
||||
# Field-specific search
|
||||
FT.SEARCH products "title:wireless AND category:electronics"
|
||||
|
||||
# Return specific fields only
|
||||
FT.SEARCH products "*" RETURN title price
|
||||
```
|
||||
|
||||
### FT.DEL - Delete Document
|
||||
|
||||
Remove a document from the search index.
|
||||
|
||||
```bash
|
||||
FT.DEL index_name doc_id
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
FT.DEL products product:1
|
||||
```
|
||||
|
||||
### FT.INFO - Get Index Information
|
||||
|
||||
Get information about a search index.
|
||||
|
||||
```bash
|
||||
FT.INFO index_name
|
||||
```
|
||||
|
||||
**Returns:**
|
||||
- Index name and document count
|
||||
- Field definitions and types
|
||||
- Index configuration
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
FT.INFO products
|
||||
```
|
||||
|
||||
### FT.DROP - Drop Index
|
||||
|
||||
Delete an entire search index.
|
||||
|
||||
```bash
|
||||
FT.DROP index_name
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
FT.DROP products
|
||||
```
|
||||
|
||||
### FT.ALTER - Alter Index Schema
|
||||
|
||||
Add new fields to an existing index.
|
||||
|
||||
```bash
|
||||
FT.ALTER index_name SCHEMA ADD field_name field_type [options]
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
FT.ALTER products SCHEMA ADD brand TAG STORED
|
||||
```
|
||||
|
||||
### FT.AGGREGATE - Aggregate Search Results
|
||||
|
||||
Perform aggregations on search results.
|
||||
|
||||
```bash
|
||||
FT.AGGREGATE index_name query [GROUPBY field] [REDUCE function field AS alias]
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
# Group products by category and count
|
||||
FT.AGGREGATE products "*" GROUPBY category REDUCE COUNT 0 AS count
|
||||
```
|
||||
|
||||
## Field Types in Detail
|
||||
|
||||
### TEXT Fields
|
||||
- **Purpose**: Full-text search on natural language content
|
||||
- **Features**: Tokenization, stemming, stop-word removal
|
||||
- **Options**: `STORED`, `INDEXED`, `TOKENIZED`
|
||||
- **Example**: Product titles, descriptions, content
|
||||
|
||||
### NUMERIC Fields
|
||||
- **Purpose**: Numeric data for range queries and sorting
|
||||
- **Types**: I64, U64, F64
|
||||
- **Options**: `STORED`, `INDEXED`, `FAST`
|
||||
- **Example**: Prices, quantities, ratings
|
||||
|
||||
### TAG Fields
|
||||
- **Purpose**: Exact-match categorical data
|
||||
- **Features**: No tokenization, exact string matching
|
||||
- **Options**: `STORED`, case sensitivity control
|
||||
- **Example**: Categories, brands, status values
|
||||
|
||||
### GEO Fields
|
||||
- **Purpose**: Geographic coordinates
|
||||
- **Format**: "latitude,longitude" (e.g., "37.7749,-122.4194")
|
||||
- **Features**: Geographic distance queries
|
||||
- **Options**: `STORED`
|
||||
|
||||
### DATE Fields
|
||||
- **Purpose**: Timestamp and date data
|
||||
- **Format**: Unix timestamp in milliseconds
|
||||
- **Features**: Range queries, temporal filtering
|
||||
- **Options**: `STORED`, `INDEXED`, `FAST`
|
||||
|
||||
## Search Query Syntax
|
||||
|
||||
### Basic Queries
|
||||
```bash
|
||||
# Single term
|
||||
FT.SEARCH products "wireless"
|
||||
|
||||
# Multiple terms (AND by default)
|
||||
FT.SEARCH products "wireless headphones"
|
||||
|
||||
# Phrase query
|
||||
FT.SEARCH products "\"noise cancellation\""
|
||||
```
|
||||
|
||||
### Field-Specific Queries
|
||||
```bash
|
||||
# Search in specific field
|
||||
FT.SEARCH products "title:wireless"
|
||||
|
||||
# Multiple field queries
|
||||
FT.SEARCH products "title:wireless AND description:bluetooth"
|
||||
```
|
||||
|
||||
### Boolean Operators
|
||||
```bash
|
||||
# AND operator
|
||||
FT.SEARCH products "wireless AND headphones"
|
||||
|
||||
# OR operator
|
||||
FT.SEARCH products "wireless OR bluetooth"
|
||||
|
||||
# NOT operator
|
||||
FT.SEARCH products "headphones NOT wired"
|
||||
```
|
||||
|
||||
### Wildcards and Fuzzy Search
|
||||
```bash
|
||||
# Wildcard search
|
||||
FT.SEARCH products "head*"
|
||||
|
||||
# Fuzzy search (approximate matching)
|
||||
FT.SEARCH products "%headphone%"
|
||||
```
|
||||
|
||||
### Range Queries
|
||||
```bash
|
||||
# Numeric range in query
|
||||
FT.SEARCH products "@price:[100 300]"
|
||||
|
||||
# Date range
|
||||
FT.SEARCH products "@created_date:[1640995200000 1672531200000]"
|
||||
```
|
||||
|
||||
## Filtering and Sorting
|
||||
|
||||
### FILTER Clause
|
||||
```bash
|
||||
# Numeric filter
|
||||
FT.SEARCH products "headphones" FILTER price 100 300
|
||||
|
||||
# Multiple filters
|
||||
FT.SEARCH products "*" FILTER price 100 500 FILTER rating 4 5
|
||||
```
|
||||
|
||||
### LIMIT Clause
|
||||
```bash
|
||||
# Pagination
|
||||
FT.SEARCH products "wireless" LIMIT 0 10 # First 10 results
|
||||
FT.SEARCH products "wireless" LIMIT 10 10 # Next 10 results
|
||||
```
|
||||
|
||||
### RETURN Clause
|
||||
```bash
|
||||
# Return specific fields
|
||||
FT.SEARCH products "*" RETURN title price
|
||||
|
||||
# Return all stored fields (default)
|
||||
FT.SEARCH products "*"
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Indexing Strategy
|
||||
- Only index fields you need to search on
|
||||
- Use `FAST` option for frequently filtered numeric fields
|
||||
- Consider storage vs. search performance trade-offs
|
||||
|
||||
### Query Optimization
|
||||
- Use specific field queries when possible
|
||||
- Combine filters with text queries for better performance
|
||||
- Use pagination with LIMIT for large result sets
|
||||
|
||||
### Memory Usage
|
||||
- Tantivy indexes are memory-mapped for performance
|
||||
- Index size depends on document count and field configuration
|
||||
- Monitor disk space for index storage
|
||||
|
||||
## Integration with Redis Commands
|
||||
|
||||
Search indexes work alongside regular Redis data:
|
||||
|
||||
```bash
|
||||
# Store product data in Redis hash
|
||||
HSET product:1 title "Wireless Headphones" price "199.99"
|
||||
|
||||
# Index the same data for search
|
||||
FT.ADD products product:1 FIELDS title "Wireless Headphones" price 199.99
|
||||
|
||||
# Search returns document IDs that can be used with Redis commands
|
||||
FT.SEARCH products "wireless"
|
||||
# Returns: product:1
|
||||
|
||||
# Retrieve full data using Redis
|
||||
HGETALL product:1
|
||||
```
|
||||
|
||||
## Example Use Cases
|
||||
|
||||
### E-commerce Product Search
|
||||
```bash
|
||||
# Create product catalog index
|
||||
FT.CREATE catalog SCHEMA
|
||||
name TEXT STORED INDEXED TOKENIZED
|
||||
description TEXT INDEXED TOKENIZED
|
||||
price NUMERIC STORED INDEXED FAST
|
||||
category TAG STORED
|
||||
brand TAG STORED
|
||||
rating NUMERIC STORED FAST
|
||||
|
||||
# Add products
|
||||
FT.ADD catalog prod:1 FIELDS name "iPhone 14" price 999 category "phones" brand "apple" rating 4.5
|
||||
FT.ADD catalog prod:2 FIELDS name "Samsung Galaxy" price 899 category "phones" brand "samsung" rating 4.3
|
||||
|
||||
# Search queries
|
||||
FT.SEARCH catalog "iPhone"
|
||||
FT.SEARCH catalog "phones" FILTER price 800 1000
|
||||
FT.SEARCH catalog "@brand:apple"
|
||||
```
|
||||
|
||||
### Content Management
|
||||
```bash
|
||||
# Create content index
|
||||
FT.CREATE content SCHEMA
|
||||
title TEXT STORED INDEXED TOKENIZED
|
||||
body TEXT INDEXED TOKENIZED
|
||||
author TAG STORED
|
||||
published DATE STORED INDEXED
|
||||
tags TAG STORED
|
||||
|
||||
# Search content
|
||||
FT.SEARCH content "machine learning"
|
||||
FT.SEARCH content "@author:john AND @tags:ai"
|
||||
FT.SEARCH content "*" FILTER published 1640995200000 1672531200000
|
||||
```
|
||||
|
||||
### Geographic Search
|
||||
```bash
|
||||
# Create location-based index
|
||||
FT.CREATE places SCHEMA
|
||||
name TEXT STORED INDEXED TOKENIZED
|
||||
location GEO STORED
|
||||
type TAG STORED
|
||||
|
||||
# Add locations
|
||||
FT.ADD places place:1 FIELDS name "Golden Gate Bridge" location "37.8199,-122.4783" type "landmark"
|
||||
|
||||
# Geographic queries (future feature)
|
||||
FT.SEARCH places "@location:[37.7749 -122.4194 10 km]"
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
Common error responses:
|
||||
- `ERR index not found` - Index doesn't exist
|
||||
- `ERR field not found` - Field not defined in schema
|
||||
- `ERR invalid query syntax` - Malformed query
|
||||
- `ERR document not found` - Document ID doesn't exist
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Schema Design**: Plan your schema carefully - changes require reindexing
|
||||
2. **Field Selection**: Only store and index fields you actually need
|
||||
3. **Batch Operations**: Add multiple documents efficiently
|
||||
4. **Query Testing**: Test queries for performance with realistic data
|
||||
5. **Monitoring**: Monitor index size and query performance
|
||||
6. **Backup**: Include search indexes in backup strategies
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
Planned features:
|
||||
- Geographic distance queries
|
||||
- Advanced aggregations and faceting
|
||||
- Highlighting of search results
|
||||
- Synonyms and custom analyzers
|
||||
- Real-time suggestions and autocomplete
|
||||
- Index replication and sharding
|
171
examples/README.md
Normal file
171
examples/README.md
Normal file
@@ -0,0 +1,171 @@
|
||||
# HeroDB Tantivy Search Examples
|
||||
|
||||
This directory contains examples demonstrating HeroDB's full-text search capabilities powered by Tantivy.
|
||||
|
||||
## Tantivy Search Demo (Bash Script)
|
||||
|
||||
### Overview
|
||||
The `tantivy_search_demo.sh` script provides a comprehensive demonstration of HeroDB's search functionality using Redis commands. It showcases various search scenarios including basic text search, filtering, sorting, geographic queries, and more.
|
||||
|
||||
### Prerequisites
|
||||
1. **HeroDB Server**: The server must be running on port 6381
|
||||
2. **Redis CLI**: The `redis-cli` tool must be installed and available in your PATH
|
||||
|
||||
### Running the Demo
|
||||
|
||||
#### Step 1: Start HeroDB Server
|
||||
```bash
|
||||
# From the project root directory
|
||||
cargo run -- --port 6381
|
||||
```
|
||||
|
||||
#### Step 2: Run the Demo (in a new terminal)
|
||||
```bash
|
||||
# From the project root directory
|
||||
./examples/tantivy_search_demo.sh
|
||||
```
|
||||
|
||||
### What the Demo Covers
|
||||
|
||||
The script demonstrates 15 different search scenarios:
|
||||
|
||||
1. **Index Creation** - Creating a search index with various field types
|
||||
2. **Data Insertion** - Adding sample products to the index
|
||||
3. **Basic Text Search** - Simple keyword searches
|
||||
4. **Filtered Search** - Combining text search with category filters
|
||||
5. **Numeric Range Search** - Finding products within price ranges
|
||||
6. **Sorting Results** - Ordering results by different fields
|
||||
7. **Limited Results** - Pagination and result limiting
|
||||
8. **Complex Queries** - Multi-field searches with sorting
|
||||
9. **Geographic Search** - Location-based queries
|
||||
10. **Index Information** - Getting statistics about the search index
|
||||
11. **Search Comparison** - Tantivy vs simple pattern matching
|
||||
12. **Fuzzy Search** - Typo tolerance and approximate matching
|
||||
13. **Phrase Search** - Exact phrase matching
|
||||
14. **Boolean Queries** - AND, OR, NOT operators
|
||||
15. **Cleanup** - Removing test data
|
||||
|
||||
### Sample Data
|
||||
|
||||
The demo uses a product catalog with the following fields:
|
||||
- **title** (TEXT) - Product name with higher search weight
|
||||
- **description** (TEXT) - Detailed product description
|
||||
- **category** (TAG) - Comma-separated categories
|
||||
- **price** (NUMERIC) - Product price for range queries
|
||||
- **rating** (NUMERIC) - Customer rating for sorting
|
||||
- **location** (GEO) - Geographic coordinates for location searches
|
||||
|
||||
### Key Redis Commands Demonstrated
|
||||
|
||||
#### Index Management
|
||||
```bash
|
||||
# Create search index
|
||||
FT.CREATE product_catalog ON HASH PREFIX 1 product: SCHEMA title TEXT WEIGHT 2.0 SORTABLE description TEXT category TAG SEPARATOR , price NUMERIC SORTABLE rating NUMERIC SORTABLE location GEO
|
||||
|
||||
# Get index information
|
||||
FT.INFO product_catalog
|
||||
|
||||
# Drop index
|
||||
FT.DROPINDEX product_catalog
|
||||
```
|
||||
|
||||
#### Search Queries
|
||||
```bash
|
||||
# Basic text search
|
||||
FT.SEARCH product_catalog wireless
|
||||
|
||||
# Filtered search
|
||||
FT.SEARCH product_catalog 'organic @category:{food}'
|
||||
|
||||
# Numeric range
|
||||
FT.SEARCH product_catalog '@price:[50 150]'
|
||||
|
||||
# Sorted results
|
||||
FT.SEARCH product_catalog '@category:{electronics}' SORTBY price ASC
|
||||
|
||||
# Geographic search
|
||||
FT.SEARCH product_catalog '@location:[37.7749 -122.4194 50 km]'
|
||||
|
||||
# Boolean queries
|
||||
FT.SEARCH product_catalog 'wireless AND audio'
|
||||
FT.SEARCH product_catalog 'coffee OR tea'
|
||||
|
||||
# Phrase search
|
||||
FT.SEARCH product_catalog '"noise canceling"'
|
||||
```
|
||||
|
||||
### Interactive Features
|
||||
|
||||
The demo script includes:
|
||||
- **Colored output** for better readability
|
||||
- **Pause between steps** to review results
|
||||
- **Error handling** with clear error messages
|
||||
- **Automatic cleanup** of test data
|
||||
- **Progress indicators** showing what each step demonstrates
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
#### HeroDB Not Running
|
||||
```
|
||||
✗ HeroDB is not running on port 6381
|
||||
ℹ Please start HeroDB with: cargo run -- --port 6381
|
||||
```
|
||||
**Solution**: Start the HeroDB server in a separate terminal.
|
||||
|
||||
#### Redis CLI Not Found
|
||||
```
|
||||
redis-cli: command not found
|
||||
```
|
||||
**Solution**: Install Redis tools or use an alternative Redis client.
|
||||
|
||||
#### Connection Refused
|
||||
```
|
||||
Could not connect to Redis at localhost:6381: Connection refused
|
||||
```
|
||||
**Solution**: Ensure HeroDB is running and listening on the correct port.
|
||||
|
||||
### Manual Testing
|
||||
|
||||
You can also run individual commands manually:
|
||||
|
||||
```bash
|
||||
# Connect to HeroDB
|
||||
redis-cli -h localhost -p 6381
|
||||
|
||||
# Create a simple index
|
||||
FT.CREATE myindex ON HASH SCHEMA title TEXT description TEXT
|
||||
|
||||
# Add a document
|
||||
HSET doc:1 title "Hello World" description "This is a test document"
|
||||
|
||||
# Search
|
||||
FT.SEARCH myindex hello
|
||||
```
|
||||
|
||||
### Performance Notes
|
||||
|
||||
- **Indexing**: Documents are indexed in real-time as they're added
|
||||
- **Search Speed**: Full-text search is much faster than pattern matching on large datasets
|
||||
- **Memory Usage**: Tantivy indexes are memory-efficient and disk-backed
|
||||
- **Scalability**: Supports millions of documents with sub-second search times
|
||||
|
||||
### Advanced Features
|
||||
|
||||
The demo showcases advanced Tantivy features:
|
||||
- **Relevance Scoring** - Results ranked by relevance
|
||||
- **Fuzzy Matching** - Handles typos and approximate matches
|
||||
- **Field Weighting** - Title field has higher search weight
|
||||
- **Multi-field Search** - Search across multiple fields simultaneously
|
||||
- **Geographic Queries** - Distance-based location searches
|
||||
- **Numeric Ranges** - Efficient range queries on numeric fields
|
||||
- **Tag Filtering** - Fast categorical filtering
|
||||
|
||||
### Next Steps
|
||||
|
||||
After running the demo, explore:
|
||||
1. **Custom Schemas** - Define your own field types and configurations
|
||||
2. **Large Datasets** - Test with thousands or millions of documents
|
||||
3. **Real Applications** - Integrate search into your applications
|
||||
4. **Performance Tuning** - Optimize for your specific use case
|
||||
|
||||
For more information, see the [search documentation](../herodb/docs/search.md).
|
186
examples/simple_demo.sh
Normal file
186
examples/simple_demo.sh
Normal file
@@ -0,0 +1,186 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Simple HeroDB Demo - Basic Redis Commands
|
||||
# This script demonstrates basic Redis functionality that's currently implemented
|
||||
|
||||
set -e # Exit on any error
|
||||
|
||||
# Configuration
|
||||
REDIS_HOST="localhost"
|
||||
REDIS_PORT="6381"
|
||||
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Function to print colored output
|
||||
print_header() {
|
||||
echo -e "${BLUE}=== $1 ===${NC}"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}✓ $1${NC}"
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e "${YELLOW}ℹ $1${NC}"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}✗ $1${NC}"
|
||||
}
|
||||
|
||||
# Function to check if HeroDB is running
|
||||
check_herodb() {
|
||||
print_info "Checking if HeroDB is running on port $REDIS_PORT..."
|
||||
if ! $REDIS_CLI ping > /dev/null 2>&1; then
|
||||
print_error "HeroDB is not running on port $REDIS_PORT"
|
||||
print_info "Please start HeroDB with: cargo run -- --port $REDIS_PORT"
|
||||
exit 1
|
||||
fi
|
||||
print_success "HeroDB is running and responding"
|
||||
}
|
||||
|
||||
# Function to execute Redis command with error handling
|
||||
execute_cmd() {
|
||||
local cmd="$1"
|
||||
local description="$2"
|
||||
|
||||
echo -e "${YELLOW}Command:${NC} $cmd"
|
||||
if result=$($REDIS_CLI $cmd 2>&1); then
|
||||
echo -e "${GREEN}Result:${NC} $result"
|
||||
return 0
|
||||
else
|
||||
print_error "Failed: $description"
|
||||
echo "Error: $result"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Main demo function
|
||||
main() {
|
||||
clear
|
||||
print_header "HeroDB Basic Functionality Demo"
|
||||
echo "This demo shows basic Redis commands that are currently implemented"
|
||||
echo "HeroDB runs on port $REDIS_PORT (instead of Redis default 6379)"
|
||||
echo
|
||||
|
||||
# Check if HeroDB is running
|
||||
check_herodb
|
||||
echo
|
||||
|
||||
print_header "Step 1: Basic Key-Value Operations"
|
||||
|
||||
execute_cmd "SET greeting 'Hello HeroDB!'" "Setting a simple key-value pair"
|
||||
echo
|
||||
execute_cmd "GET greeting" "Getting the value"
|
||||
echo
|
||||
execute_cmd "SET counter 42" "Setting a numeric value"
|
||||
echo
|
||||
execute_cmd "INCR counter" "Incrementing the counter"
|
||||
echo
|
||||
execute_cmd "GET counter" "Getting the incremented value"
|
||||
echo
|
||||
|
||||
print_header "Step 2: Hash Operations"
|
||||
|
||||
execute_cmd "HSET user:1 name 'John Doe' email 'john@example.com' age 30" "Setting hash fields"
|
||||
echo
|
||||
execute_cmd "HGET user:1 name" "Getting a specific field"
|
||||
echo
|
||||
execute_cmd "HGETALL user:1" "Getting all fields"
|
||||
echo
|
||||
execute_cmd "HLEN user:1" "Getting hash length"
|
||||
echo
|
||||
|
||||
print_header "Step 3: List Operations"
|
||||
|
||||
execute_cmd "LPUSH tasks 'Write code' 'Test code' 'Deploy code'" "Adding items to list"
|
||||
echo
|
||||
execute_cmd "LLEN tasks" "Getting list length"
|
||||
echo
|
||||
execute_cmd "LRANGE tasks 0 -1" "Getting all list items"
|
||||
echo
|
||||
execute_cmd "LPOP tasks" "Popping from left"
|
||||
echo
|
||||
execute_cmd "LRANGE tasks 0 -1" "Checking remaining items"
|
||||
echo
|
||||
|
||||
print_header "Step 4: Key Management"
|
||||
|
||||
execute_cmd "KEYS *" "Listing all keys"
|
||||
echo
|
||||
execute_cmd "EXISTS greeting" "Checking if key exists"
|
||||
echo
|
||||
execute_cmd "TYPE user:1" "Getting key type"
|
||||
echo
|
||||
execute_cmd "DBSIZE" "Getting database size"
|
||||
echo
|
||||
|
||||
print_header "Step 5: Expiration"
|
||||
|
||||
execute_cmd "SET temp_key 'temporary value'" "Setting temporary key"
|
||||
echo
|
||||
execute_cmd "EXPIRE temp_key 5" "Setting 5 second expiration"
|
||||
echo
|
||||
execute_cmd "TTL temp_key" "Checking time to live"
|
||||
echo
|
||||
print_info "Waiting 2 seconds..."
|
||||
sleep 2
|
||||
execute_cmd "TTL temp_key" "Checking TTL again"
|
||||
echo
|
||||
|
||||
print_header "Step 6: Multiple Operations"
|
||||
|
||||
execute_cmd "MSET key1 'value1' key2 'value2' key3 'value3'" "Setting multiple keys"
|
||||
echo
|
||||
execute_cmd "MGET key1 key2 key3" "Getting multiple values"
|
||||
echo
|
||||
execute_cmd "DEL key1 key2" "Deleting multiple keys"
|
||||
echo
|
||||
execute_cmd "EXISTS key1 key2 key3" "Checking existence of multiple keys"
|
||||
echo
|
||||
|
||||
print_header "Step 7: Search Commands (Placeholder)"
|
||||
print_info "Testing FT.CREATE command (currently returns placeholder response)"
|
||||
|
||||
execute_cmd "FT.CREATE test_index SCHEMA title TEXT description TEXT" "Creating search index"
|
||||
echo
|
||||
|
||||
print_header "Step 8: Server Information"
|
||||
|
||||
execute_cmd "INFO" "Getting server information"
|
||||
echo
|
||||
execute_cmd "CONFIG GET dir" "Getting configuration"
|
||||
echo
|
||||
|
||||
print_header "Step 9: Cleanup"
|
||||
|
||||
execute_cmd "FLUSHDB" "Clearing database"
|
||||
echo
|
||||
execute_cmd "DBSIZE" "Confirming database is empty"
|
||||
echo
|
||||
|
||||
print_header "Demo Summary"
|
||||
echo "This demonstration showed:"
|
||||
echo "• Basic key-value operations (GET, SET, INCR)"
|
||||
echo "• Hash operations (HSET, HGET, HGETALL)"
|
||||
echo "• List operations (LPUSH, LPOP, LRANGE)"
|
||||
echo "• Key management (KEYS, EXISTS, TYPE, DEL)"
|
||||
echo "• Expiration handling (EXPIRE, TTL)"
|
||||
echo "• Multiple key operations (MSET, MGET)"
|
||||
echo "• Server information commands"
|
||||
echo
|
||||
print_success "HeroDB basic functionality demo completed successfully!"
|
||||
echo
|
||||
print_info "Note: Full-text search (FT.*) commands are defined but not yet fully implemented"
|
||||
print_info "To run HeroDB server: cargo run -- --port 6381"
|
||||
print_info "To connect with redis-cli: redis-cli -h localhost -p 6381"
|
||||
}
|
||||
|
||||
# Run the demo
|
||||
main "$@"
|
@@ -1,29 +0,0 @@
|
||||
[package]
|
||||
name = "herodb"
|
||||
version = "0.0.1"
|
||||
authors = ["Pin Fang <fpfangpin@hotmail.com>"]
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.59"
|
||||
bytes = "1.3.0"
|
||||
thiserror = "1.0.32"
|
||||
tokio = { version = "1.23.0", features = ["full"] }
|
||||
clap = { version = "4.5.20", features = ["derive"] }
|
||||
byteorder = "1.4.3"
|
||||
futures = "0.3"
|
||||
sled = "0.34"
|
||||
redb = "2.1.3"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
bincode = "1.3"
|
||||
chacha20poly1305 = "0.10.1"
|
||||
rand = "0.8"
|
||||
sha2 = "0.10"
|
||||
age = "0.10"
|
||||
secrecy = "0.8"
|
||||
ed25519-dalek = "2"
|
||||
base64 = "0.22"
|
||||
|
||||
[dev-dependencies]
|
||||
redis = { version = "0.24", features = ["aio", "tokio-comp"] }
|
1251
specs/backgroundinfo/lance.md
Normal file
1251
specs/backgroundinfo/lance.md
Normal file
File diff suppressed because it is too large
Load Diff
6847
specs/backgroundinfo/lancedb.md
Normal file
6847
specs/backgroundinfo/lancedb.md
Normal file
File diff suppressed because it is too large
Load Diff
735
specs/lance_implementation.md
Normal file
735
specs/lance_implementation.md
Normal file
@@ -0,0 +1,735 @@
|
||||
Looking at your request to ensure embedding happens with an external embedding provider configured in Redis at `config:core:aiembed:url`, I'll modify the implementation to use an external embedding service instead of local models.
|
||||
|
||||
## Updated Implementation with External Embedding Provider
|
||||
|
||||
### 1. Update Dependencies in `Cargo.toml`
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
# ... existing dependencies ...
|
||||
lance = "0.18"
|
||||
lance-index = "0.18"
|
||||
lance-linalg = "0.18"
|
||||
arrow = "52"
|
||||
arrow-array = "52"
|
||||
arrow-schema = "52"
|
||||
parquet = "52"
|
||||
uuid = { version = "1.10", features = ["v4"] }
|
||||
reqwest = { version = "0.11", features = ["json"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
base64 = "0.22"
|
||||
image = "0.25"
|
||||
```
|
||||
|
||||
### 2. Create Enhanced Lance Module with External Embedding
|
||||
|
||||
Create `src/lance_store.rs`:
|
||||
|
||||
```rust
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use arrow::array::{Float32Array, StringArray, BinaryArray, ArrayRef};
|
||||
use arrow::datatypes::{DataType, Field, Schema};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use lance::dataset::{Dataset, WriteParams, WriteMode};
|
||||
use lance::index::vector::VectorIndexParams;
|
||||
use lance_index::vector::pq::PQBuildParams;
|
||||
use lance_index::vector::ivf::IvfBuildParams;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::error::DBError;
|
||||
use crate::cmd::Protocol;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct EmbeddingRequest {
|
||||
texts: Option<Vec<String>>,
|
||||
images: Option<Vec<String>>, // base64 encoded
|
||||
model: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct EmbeddingResponse {
|
||||
embeddings: Vec<Vec<f32>>,
|
||||
model: String,
|
||||
usage: Option<HashMap<String, u32>>,
|
||||
}
|
||||
|
||||
pub struct LanceStore {
|
||||
datasets: Arc<RwLock<HashMap<String, Arc<Dataset>>>>,
|
||||
data_dir: PathBuf,
|
||||
http_client: reqwest::Client,
|
||||
}
|
||||
|
||||
impl LanceStore {
|
||||
pub async fn new(data_dir: PathBuf) -> Result<Self, DBError> {
|
||||
// Create data directory if it doesn't exist
|
||||
std::fs::create_dir_all(&data_dir)
|
||||
.map_err(|e| DBError(format!("Failed to create Lance data directory: {}", e)))?;
|
||||
|
||||
let http_client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.build()
|
||||
.map_err(|e| DBError(format!("Failed to create HTTP client: {}", e)))?;
|
||||
|
||||
Ok(Self {
|
||||
datasets: Arc::new(RwLock::new(HashMap::new())),
|
||||
data_dir,
|
||||
http_client,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get embedding service URL from Redis config
|
||||
async fn get_embedding_url(&self, server: &crate::server::Server) -> Result<String, DBError> {
|
||||
// Get the embedding URL from Redis config
|
||||
let key = "config:core:aiembed:url";
|
||||
|
||||
// Use HGET to retrieve the URL from Redis hash
|
||||
let cmd = crate::cmd::Cmd::HGet {
|
||||
key: key.to_string(),
|
||||
field: "url".to_string(),
|
||||
};
|
||||
|
||||
// Execute command to get the config
|
||||
let result = cmd.run(server).await?;
|
||||
|
||||
match result {
|
||||
Protocol::BulkString(url) => Ok(url),
|
||||
Protocol::SimpleString(url) => Ok(url),
|
||||
Protocol::Nil => Err(DBError(
|
||||
"Embedding service URL not configured. Set it with: HSET config:core:aiembed:url url <YOUR_EMBEDDING_SERVICE_URL>".to_string()
|
||||
)),
|
||||
_ => Err(DBError("Invalid embedding URL configuration".to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Call external embedding service
|
||||
async fn call_embedding_service(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
texts: Option<Vec<String>>,
|
||||
images: Option<Vec<String>>,
|
||||
) -> Result<Vec<Vec<f32>>, DBError> {
|
||||
let url = self.get_embedding_url(server).await?;
|
||||
|
||||
let request = EmbeddingRequest {
|
||||
texts,
|
||||
images,
|
||||
model: None, // Let the service use its default
|
||||
};
|
||||
|
||||
let response = self.http_client
|
||||
.post(&url)
|
||||
.json(&request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to call embedding service: {}", e)))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_default();
|
||||
return Err(DBError(format!(
|
||||
"Embedding service returned error {}: {}",
|
||||
status, error_text
|
||||
)));
|
||||
}
|
||||
|
||||
let embedding_response: EmbeddingResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to parse embedding response: {}", e)))?;
|
||||
|
||||
Ok(embedding_response.embeddings)
|
||||
}
|
||||
|
||||
pub async fn embed_text(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
texts: Vec<String>
|
||||
) -> Result<Vec<Vec<f32>>, DBError> {
|
||||
if texts.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
self.call_embedding_service(server, Some(texts), None).await
|
||||
}
|
||||
|
||||
pub async fn embed_image(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
image_bytes: Vec<u8>
|
||||
) -> Result<Vec<f32>, DBError> {
|
||||
// Convert image bytes to base64
|
||||
let base64_image = base64::encode(&image_bytes);
|
||||
|
||||
let embeddings = self.call_embedding_service(
|
||||
server,
|
||||
None,
|
||||
Some(vec![base64_image])
|
||||
).await?;
|
||||
|
||||
embeddings.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| DBError("No embedding returned for image".to_string()))
|
||||
}
|
||||
|
||||
pub async fn create_dataset(
|
||||
&self,
|
||||
name: &str,
|
||||
schema: Schema,
|
||||
) -> Result<(), DBError> {
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||
|
||||
// Create empty dataset with schema
|
||||
let write_params = WriteParams {
|
||||
mode: WriteMode::Create,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Create an empty RecordBatch with the schema
|
||||
let empty_batch = RecordBatch::new_empty(Arc::new(schema));
|
||||
let batches = vec![empty_batch];
|
||||
|
||||
let dataset = Dataset::write(
|
||||
batches,
|
||||
dataset_path.to_str().unwrap(),
|
||||
Some(write_params)
|
||||
).await
|
||||
.map_err(|e| DBError(format!("Failed to create dataset: {}", e)))?;
|
||||
|
||||
let mut datasets = self.datasets.write().await;
|
||||
datasets.insert(name.to_string(), Arc::new(dataset));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write_vectors(
|
||||
&self,
|
||||
dataset_name: &str,
|
||||
vectors: Vec<Vec<f32>>,
|
||||
metadata: Option<HashMap<String, Vec<String>>>,
|
||||
) -> Result<usize, DBError> {
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", dataset_name));
|
||||
|
||||
// Open or get cached dataset
|
||||
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
// Build RecordBatch
|
||||
let num_vectors = vectors.len();
|
||||
if num_vectors == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let dim = vectors.first()
|
||||
.ok_or_else(|| DBError("Empty vectors".to_string()))?
|
||||
.len();
|
||||
|
||||
// Flatten vectors
|
||||
let flat_vectors: Vec<f32> = vectors.into_iter().flatten().collect();
|
||||
let vector_array = Float32Array::from(flat_vectors);
|
||||
let vector_array = arrow::array::FixedSizeListArray::try_new_from_values(
|
||||
vector_array,
|
||||
dim as i32
|
||||
).map_err(|e| DBError(format!("Failed to create vector array: {}", e)))?;
|
||||
|
||||
let mut arrays: Vec<ArrayRef> = vec![Arc::new(vector_array)];
|
||||
let mut fields = vec![Field::new(
|
||||
"vector",
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
dim as i32
|
||||
),
|
||||
false
|
||||
)];
|
||||
|
||||
// Add metadata columns if provided
|
||||
if let Some(metadata) = metadata {
|
||||
for (key, values) in metadata {
|
||||
if values.len() != num_vectors {
|
||||
return Err(DBError(format!(
|
||||
"Metadata field '{}' has {} values but expected {}",
|
||||
key, values.len(), num_vectors
|
||||
)));
|
||||
}
|
||||
let array = StringArray::from(values);
|
||||
arrays.push(Arc::new(array));
|
||||
fields.push(Field::new(&key, DataType::Utf8, true));
|
||||
}
|
||||
}
|
||||
|
||||
let schema = Arc::new(Schema::new(fields));
|
||||
let batch = RecordBatch::try_new(schema, arrays)
|
||||
.map_err(|e| DBError(format!("Failed to create RecordBatch: {}", e)))?;
|
||||
|
||||
// Append to dataset
|
||||
let write_params = WriteParams {
|
||||
mode: WriteMode::Append,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
Dataset::write(
|
||||
vec![batch],
|
||||
dataset_path.to_str().unwrap(),
|
||||
Some(write_params)
|
||||
).await
|
||||
.map_err(|e| DBError(format!("Failed to write to dataset: {}", e)))?;
|
||||
|
||||
// Refresh cached dataset
|
||||
let mut datasets = self.datasets.write().await;
|
||||
datasets.remove(dataset_name);
|
||||
|
||||
Ok(num_vectors)
|
||||
}
|
||||
|
||||
pub async fn search_vectors(
|
||||
&self,
|
||||
dataset_name: &str,
|
||||
query_vector: Vec<f32>,
|
||||
k: usize,
|
||||
nprobes: Option<usize>,
|
||||
refine_factor: Option<usize>,
|
||||
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
|
||||
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
// Build query
|
||||
let mut query = dataset.scan();
|
||||
query = query.nearest(
|
||||
"vector",
|
||||
&query_vector,
|
||||
k,
|
||||
).map_err(|e| DBError(format!("Failed to build search query: {}", e)))?;
|
||||
|
||||
if let Some(nprobes) = nprobes {
|
||||
query = query.nprobes(nprobes);
|
||||
}
|
||||
|
||||
if let Some(refine) = refine_factor {
|
||||
query = query.refine_factor(refine);
|
||||
}
|
||||
|
||||
// Execute search
|
||||
let results = query
|
||||
.try_into_stream()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to execute search: {}", e)))?
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to collect results: {}", e)))?;
|
||||
|
||||
// Process results
|
||||
let mut output = Vec::new();
|
||||
for batch in results {
|
||||
// Get distances
|
||||
let distances = batch
|
||||
.column_by_name("_distance")
|
||||
.ok_or_else(|| DBError("No distance column".to_string()))?
|
||||
.as_any()
|
||||
.downcast_ref::<Float32Array>()
|
||||
.ok_or_else(|| DBError("Invalid distance type".to_string()))?;
|
||||
|
||||
// Get metadata
|
||||
for i in 0..batch.num_rows() {
|
||||
let distance = distances.value(i);
|
||||
let mut metadata = HashMap::new();
|
||||
|
||||
for field in batch.schema().fields() {
|
||||
if field.name() != "vector" && field.name() != "_distance" {
|
||||
if let Some(col) = batch.column_by_name(field.name()) {
|
||||
if let Some(str_array) = col.as_any().downcast_ref::<StringArray>() {
|
||||
if !str_array.is_null(i) {
|
||||
metadata.insert(
|
||||
field.name().to_string(),
|
||||
str_array.value(i).to_string()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output.push((distance, metadata));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
pub async fn store_multimodal(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
dataset_name: &str,
|
||||
text: Option<String>,
|
||||
image_bytes: Option<Vec<u8>>,
|
||||
metadata: HashMap<String, String>,
|
||||
) -> Result<String, DBError> {
|
||||
// Generate ID
|
||||
let id = uuid::Uuid::new_v4().to_string();
|
||||
|
||||
// Generate embeddings using external service
|
||||
let embedding = if let Some(text) = text.as_ref() {
|
||||
self.embed_text(server, vec![text.clone()]).await?
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| DBError("No embedding returned".to_string()))?
|
||||
} else if let Some(img) = image_bytes.as_ref() {
|
||||
self.embed_image(server, img.clone()).await?
|
||||
} else {
|
||||
return Err(DBError("No text or image provided".to_string()));
|
||||
};
|
||||
|
||||
// Prepare metadata
|
||||
let mut full_metadata = metadata;
|
||||
full_metadata.insert("id".to_string(), id.clone());
|
||||
if let Some(text) = text {
|
||||
full_metadata.insert("text".to_string(), text);
|
||||
}
|
||||
if let Some(img) = image_bytes {
|
||||
full_metadata.insert("image_base64".to_string(), base64::encode(img));
|
||||
}
|
||||
|
||||
// Convert metadata to column vectors
|
||||
let mut metadata_cols = HashMap::new();
|
||||
for (key, value) in full_metadata {
|
||||
metadata_cols.insert(key, vec![value]);
|
||||
}
|
||||
|
||||
// Write to dataset
|
||||
self.write_vectors(dataset_name, vec![embedding], Some(metadata_cols)).await?;
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub async fn search_with_text(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
dataset_name: &str,
|
||||
query_text: String,
|
||||
k: usize,
|
||||
nprobes: Option<usize>,
|
||||
refine_factor: Option<usize>,
|
||||
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
|
||||
// Embed the query text using external service
|
||||
let embeddings = self.embed_text(server, vec![query_text]).await?;
|
||||
let query_vector = embeddings.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| DBError("No embedding returned for query".to_string()))?;
|
||||
|
||||
// Search with the embedding
|
||||
self.search_vectors(dataset_name, query_vector, k, nprobes, refine_factor).await
|
||||
}
|
||||
|
||||
pub async fn create_index(
|
||||
&self,
|
||||
dataset_name: &str,
|
||||
index_type: &str,
|
||||
num_partitions: Option<usize>,
|
||||
num_sub_vectors: Option<usize>,
|
||||
) -> Result<(), DBError> {
|
||||
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
let mut params = VectorIndexParams::default();
|
||||
|
||||
match index_type.to_uppercase().as_str() {
|
||||
"IVF_PQ" => {
|
||||
params.ivf = IvfBuildParams {
|
||||
num_partitions: num_partitions.unwrap_or(256),
|
||||
..Default::default()
|
||||
};
|
||||
params.pq = PQBuildParams {
|
||||
num_sub_vectors: num_sub_vectors.unwrap_or(16),
|
||||
..Default::default()
|
||||
};
|
||||
}
|
||||
_ => return Err(DBError(format!("Unsupported index type: {}", index_type))),
|
||||
}
|
||||
|
||||
dataset.create_index(
|
||||
&["vector"],
|
||||
lance::index::IndexType::Vector,
|
||||
None,
|
||||
¶ms,
|
||||
true
|
||||
).await
|
||||
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_or_open_dataset(&self, name: &str) -> Result<Arc<Dataset>, DBError> {
|
||||
let mut datasets = self.datasets.write().await;
|
||||
|
||||
if let Some(dataset) = datasets.get(name) {
|
||||
return Ok(dataset.clone());
|
||||
}
|
||||
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||
if !dataset_path.exists() {
|
||||
return Err(DBError(format!("Dataset '{}' does not exist", name)));
|
||||
}
|
||||
|
||||
let dataset = Dataset::open(dataset_path.to_str().unwrap())
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to open dataset: {}", e)))?;
|
||||
|
||||
let dataset = Arc::new(dataset);
|
||||
datasets.insert(name.to_string(), dataset.clone());
|
||||
|
||||
Ok(dataset)
|
||||
}
|
||||
|
||||
pub async fn list_datasets(&self) -> Result<Vec<String>, DBError> {
|
||||
let mut datasets = Vec::new();
|
||||
|
||||
let entries = std::fs::read_dir(&self.data_dir)
|
||||
.map_err(|e| DBError(format!("Failed to read data directory: {}", e)))?;
|
||||
|
||||
for entry in entries {
|
||||
let entry = entry.map_err(|e| DBError(format!("Failed to read entry: {}", e)))?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_dir() {
|
||||
if let Some(name) = path.file_name() {
|
||||
if let Some(name_str) = name.to_str() {
|
||||
if name_str.ends_with(".lance") {
|
||||
let dataset_name = name_str.trim_end_matches(".lance");
|
||||
datasets.push(dataset_name.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(datasets)
|
||||
}
|
||||
|
||||
pub async fn drop_dataset(&self, name: &str) -> Result<(), DBError> {
|
||||
// Remove from cache
|
||||
let mut datasets = self.datasets.write().await;
|
||||
datasets.remove(name);
|
||||
|
||||
// Delete from disk
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", name));
|
||||
if dataset_path.exists() {
|
||||
std::fs::remove_dir_all(dataset_path)
|
||||
.map_err(|e| DBError(format!("Failed to delete dataset: {}", e)))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_dataset_info(&self, name: &str) -> Result<HashMap<String, String>, DBError> {
|
||||
let dataset = self.get_or_open_dataset(name).await?;
|
||||
|
||||
let mut info = HashMap::new();
|
||||
info.insert("name".to_string(), name.to_string());
|
||||
info.insert("version".to_string(), dataset.version().to_string());
|
||||
info.insert("num_rows".to_string(), dataset.count_rows().await?.to_string());
|
||||
|
||||
// Get schema info
|
||||
let schema = dataset.schema();
|
||||
let fields: Vec<String> = schema.fields()
|
||||
.iter()
|
||||
.map(|f| format!("{}:{}", f.name(), f.data_type()))
|
||||
.collect();
|
||||
info.insert("schema".to_string(), fields.join(", "));
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Update Command Implementations
|
||||
|
||||
Update the command implementations to pass the server reference for embedding service access:
|
||||
|
||||
```rust
|
||||
// In cmd.rs, update the lance command implementations
|
||||
|
||||
async fn lance_store_cmd(
|
||||
server: &Server,
|
||||
dataset: &str,
|
||||
text: Option<String>,
|
||||
image_base64: Option<String>,
|
||||
metadata: HashMap<String, String>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
let lance_store = server.lance_store()?;
|
||||
|
||||
// Decode image if provided
|
||||
let image_bytes = if let Some(b64) = image_base64 {
|
||||
Some(base64::decode(b64).map_err(|e|
|
||||
DBError(format!("Invalid base64 image: {}", e)))?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Pass server reference for embedding service access
|
||||
let id = lance_store.store_multimodal(
|
||||
server, // Pass server to access Redis config
|
||||
dataset,
|
||||
text,
|
||||
image_bytes,
|
||||
metadata,
|
||||
).await?;
|
||||
|
||||
Ok(Protocol::BulkString(id))
|
||||
}
|
||||
|
||||
async fn lance_embed_text_cmd(
|
||||
server: &Server,
|
||||
texts: &[String],
|
||||
) -> Result<Protocol, DBError> {
|
||||
let lance_store = server.lance_store()?;
|
||||
|
||||
// Pass server reference for embedding service access
|
||||
let embeddings = lance_store.embed_text(server, texts.to_vec()).await?;
|
||||
|
||||
// Return as array of vectors
|
||||
let mut output = Vec::new();
|
||||
for embedding in embeddings {
|
||||
let vector_str = format!("[{}]",
|
||||
embedding.iter()
|
||||
.map(|f| f.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
output.push(Protocol::BulkString(vector_str));
|
||||
}
|
||||
|
||||
Ok(Protocol::Array(output))
|
||||
}
|
||||
|
||||
async fn lance_search_text_cmd(
|
||||
server: &Server,
|
||||
dataset: &str,
|
||||
query_text: &str,
|
||||
k: usize,
|
||||
nprobes: Option<usize>,
|
||||
refine_factor: Option<usize>,
|
||||
) -> Result<Protocol, DBError> {
|
||||
let lance_store = server.lance_store()?;
|
||||
|
||||
// Search using text query (will be embedded automatically)
|
||||
let results = lance_store.search_with_text(
|
||||
server,
|
||||
dataset,
|
||||
query_text.to_string(),
|
||||
k,
|
||||
nprobes,
|
||||
refine_factor,
|
||||
).await?;
|
||||
|
||||
// Format results
|
||||
let mut output = Vec::new();
|
||||
for (distance, metadata) in results {
|
||||
let metadata_json = serde_json::to_string(&metadata)
|
||||
.unwrap_or_else(|_| "{}".to_string());
|
||||
|
||||
output.push(Protocol::Array(vec![
|
||||
Protocol::BulkString(distance.to_string()),
|
||||
Protocol::BulkString(metadata_json),
|
||||
]));
|
||||
}
|
||||
|
||||
Ok(Protocol::Array(output))
|
||||
}
|
||||
|
||||
// Add new command for text-based search
|
||||
pub enum Cmd {
|
||||
// ... existing commands ...
|
||||
LanceSearchText {
|
||||
dataset: String,
|
||||
query_text: String,
|
||||
k: usize,
|
||||
nprobes: Option<usize>,
|
||||
refine_factor: Option<usize>,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### 1. Configure the Embedding Service
|
||||
|
||||
First, users need to configure the embedding service URL:
|
||||
|
||||
```bash
|
||||
# Configure the embedding service endpoint
|
||||
redis-cli> HSET config:core:aiembed:url url "http://localhost:8000/embeddings"
|
||||
OK
|
||||
|
||||
# Or use a cloud service
|
||||
redis-cli> HSET config:core:aiembed:url url "https://api.openai.com/v1/embeddings"
|
||||
OK
|
||||
```
|
||||
|
||||
### 2. Use Lance Commands with Automatic External Embedding
|
||||
|
||||
```bash
|
||||
# Create a dataset
|
||||
redis-cli> LANCE.CREATE products DIM 1536 SCHEMA name:string price:float category:string
|
||||
OK
|
||||
|
||||
# Store text with automatic embedding (calls external service)
|
||||
redis-cli> LANCE.STORE products TEXT "Wireless noise-canceling headphones with 30-hour battery" name:AirPods price:299.99 category:Electronics
|
||||
"uuid-123-456"
|
||||
|
||||
# Search using text query (automatically embeds the query)
|
||||
redis-cli> LANCE.SEARCH.TEXT products "best headphones for travel" K 5
|
||||
1) "0.92"
|
||||
2) "{\"id\":\"uuid-123\",\"name\":\"AirPods\",\"price\":\"299.99\"}"
|
||||
|
||||
# Get embeddings directly
|
||||
redis-cli> LANCE.EMBED.TEXT "This text will be embedded"
|
||||
1) "[0.123, 0.456, 0.789, ...]"
|
||||
```
|
||||
|
||||
## External Embedding Service API Specification
|
||||
|
||||
The external embedding service should accept POST requests with this format:
|
||||
|
||||
```json
|
||||
// Request
|
||||
{
|
||||
"texts": ["text1", "text2"], // Optional
|
||||
"images": ["base64_img1"], // Optional
|
||||
"model": "text-embedding-ada-002" // Optional
|
||||
}
|
||||
|
||||
// Response
|
||||
{
|
||||
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
|
||||
"model": "text-embedding-ada-002",
|
||||
"usage": {
|
||||
"prompt_tokens": 100,
|
||||
"total_tokens": 100
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The implementation includes comprehensive error handling:
|
||||
|
||||
1. **Missing Configuration**: Clear error message if embedding URL not configured
|
||||
2. **Service Failures**: Graceful handling of embedding service errors
|
||||
3. **Timeout Protection**: 30-second timeout for embedding requests
|
||||
4. **Retry Logic**: Could be added for resilience
|
||||
|
||||
## Benefits of This Approach
|
||||
|
||||
1. **Flexibility**: Supports any embedding service with compatible API
|
||||
2. **Cost Control**: Use your preferred embedding provider
|
||||
3. **Scalability**: Embedding service can be scaled independently
|
||||
4. **Consistency**: All embeddings use the same configured service
|
||||
5. **Security**: API keys and endpoints stored securely in Redis
|
||||
|
||||
This implementation ensures that all embedding operations go through the external service configured in Redis, providing a clean separation between the vector database functionality and the embedding generation.
|
||||
|
||||
|
||||
TODO EXTRA:
|
||||
|
||||
- secret for the embedding service API key
|
||||
|
@@ -6,7 +6,7 @@ use futures::future::select_all;
|
||||
pub enum Cmd {
|
||||
Ping,
|
||||
Echo(String),
|
||||
Select(u64), // Changed from u16 to u64
|
||||
Select(u64, Option<String>), // db_index, optional_key
|
||||
Get(String),
|
||||
Set(String, String),
|
||||
SetPx(String, String, u128),
|
||||
@@ -98,11 +98,18 @@ impl Cmd {
|
||||
Ok((
|
||||
match cmd[0].to_lowercase().as_str() {
|
||||
"select" => {
|
||||
if cmd.len() != 2 {
|
||||
if cmd.len() < 2 || cmd.len() > 4 {
|
||||
return Err(DBError("wrong number of arguments for SELECT".to_string()));
|
||||
}
|
||||
let idx = cmd[1].parse::<u64>().map_err(|_| DBError("ERR DB index is not an integer".to_string()))?;
|
||||
Cmd::Select(idx)
|
||||
let key = if cmd.len() == 4 && cmd[2].to_lowercase() == "key" {
|
||||
Some(cmd[3].clone())
|
||||
} else if cmd.len() == 2 {
|
||||
None
|
||||
} else {
|
||||
return Err(DBError("ERR syntax error".to_string()));
|
||||
};
|
||||
Cmd::Select(idx, key)
|
||||
}
|
||||
"echo" => Cmd::Echo(cmd[1].clone()),
|
||||
"ping" => Cmd::Ping,
|
||||
@@ -642,7 +649,7 @@ impl Cmd {
|
||||
}
|
||||
|
||||
match self {
|
||||
Cmd::Select(db) => select_cmd(server, db).await,
|
||||
Cmd::Select(db, key) => select_cmd(server, db, key).await,
|
||||
Cmd::Ping => Ok(Protocol::SimpleString("PONG".to_string())),
|
||||
Cmd::Echo(s) => Ok(Protocol::BulkString(s)),
|
||||
Cmd::Get(k) => get_cmd(server, &k).await,
|
||||
@@ -736,7 +743,14 @@ impl Cmd {
|
||||
|
||||
pub fn to_protocol(self) -> Protocol {
|
||||
match self {
|
||||
Cmd::Select(db) => Protocol::Array(vec![Protocol::BulkString("select".to_string()), Protocol::BulkString(db.to_string())]),
|
||||
Cmd::Select(db, key) => {
|
||||
let mut arr = vec![Protocol::BulkString("select".to_string()), Protocol::BulkString(db.to_string())];
|
||||
if let Some(k) = key {
|
||||
arr.push(Protocol::BulkString("key".to_string()));
|
||||
arr.push(Protocol::BulkString(k));
|
||||
}
|
||||
Protocol::Array(arr)
|
||||
}
|
||||
Cmd::Ping => Protocol::Array(vec![Protocol::BulkString("ping".to_string())]),
|
||||
Cmd::Echo(s) => Protocol::Array(vec![Protocol::BulkString("echo".to_string()), Protocol::BulkString(s)]),
|
||||
Cmd::Get(k) => Protocol::Array(vec![Protocol::BulkString("get".to_string()), Protocol::BulkString(k)]),
|
||||
@@ -753,9 +767,44 @@ async fn flushdb_cmd(server: &mut Server) -> Result<Protocol, DBError> {
|
||||
}
|
||||
}
|
||||
|
||||
async fn select_cmd(server: &mut Server, db: u64) -> Result<Protocol, DBError> {
|
||||
// Test if we can access the database (this will create it if needed)
|
||||
async fn select_cmd(server: &mut Server, db: u64, key: Option<String>) -> Result<Protocol, DBError> {
|
||||
// Load database metadata
|
||||
let meta = match crate::rpc::RpcServerImpl::load_meta_static(&server.option.dir, db).await {
|
||||
Ok(m) => m,
|
||||
Err(_) => {
|
||||
// If meta doesn't exist, create default
|
||||
let default_meta = crate::rpc::DatabaseMeta {
|
||||
public: true,
|
||||
keys: std::collections::HashMap::new(),
|
||||
};
|
||||
if let Err(_) = crate::rpc::RpcServerImpl::save_meta_static(&server.option.dir, db, &default_meta).await {
|
||||
return Ok(Protocol::err("ERR failed to initialize database metadata"));
|
||||
}
|
||||
default_meta
|
||||
}
|
||||
};
|
||||
|
||||
// Check access permissions
|
||||
let permissions = if meta.public {
|
||||
// Public database - full access
|
||||
Some(crate::rpc::Permissions::ReadWrite)
|
||||
} else if let Some(key_str) = key {
|
||||
// Private database - check key
|
||||
let hash = crate::rpc::hash_key(&key_str);
|
||||
if let Some(access_key) = meta.keys.get(&hash) {
|
||||
Some(access_key.permissions.clone())
|
||||
} else {
|
||||
return Ok(Protocol::err("ERR invalid access key"));
|
||||
}
|
||||
} else {
|
||||
return Ok(Protocol::err("ERR access key required for private database"));
|
||||
};
|
||||
|
||||
// Set selected database and permissions
|
||||
server.selected_db = db;
|
||||
server.current_permissions = permissions;
|
||||
|
||||
// Test if we can access the database (this will create it if needed)
|
||||
match server.current_storage() {
|
||||
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
|
||||
Err(e) => Ok(Protocol::err(&e.0)),
|
||||
@@ -1003,6 +1052,9 @@ async fn brpop_cmd(server: &Server, keys: &[String], timeout_secs: f64) -> Resul
|
||||
}
|
||||
|
||||
async fn lpush_cmd(server: &Server, key: &str, elements: &[String]) -> Result<Protocol, DBError> {
|
||||
if !server.has_write_permission() {
|
||||
return Ok(Protocol::err("ERR write permission denied"));
|
||||
}
|
||||
match server.current_storage()?.lpush(key, elements.to_vec()) {
|
||||
Ok(len) => {
|
||||
// Attempt to deliver to any blocked BLPOP waiters
|
||||
@@ -1134,6 +1186,9 @@ async fn type_cmd(server: &Server, k: &String) -> Result<Protocol, DBError> {
|
||||
}
|
||||
|
||||
async fn del_cmd(server: &Server, k: &str) -> Result<Protocol, DBError> {
|
||||
if !server.has_write_permission() {
|
||||
return Ok(Protocol::err("ERR write permission denied"));
|
||||
}
|
||||
server.current_storage()?.del(k.to_string())?;
|
||||
Ok(Protocol::SimpleString("1".to_string()))
|
||||
}
|
||||
@@ -1159,6 +1214,9 @@ async fn set_px_cmd(
|
||||
}
|
||||
|
||||
async fn set_cmd(server: &Server, k: &str, v: &str) -> Result<Protocol, DBError> {
|
||||
if !server.has_write_permission() {
|
||||
return Ok(Protocol::err("ERR write permission denied"));
|
||||
}
|
||||
server.current_storage()?.set(k.to_string(), v.to_string())?;
|
||||
Ok(Protocol::SimpleString("OK".to_string()))
|
||||
}
|
||||
@@ -1273,6 +1331,9 @@ async fn get_cmd(server: &Server, k: &str) -> Result<Protocol, DBError> {
|
||||
|
||||
// Hash command implementations
|
||||
async fn hset_cmd(server: &Server, key: &str, pairs: &[(String, String)]) -> Result<Protocol, DBError> {
|
||||
if !server.has_write_permission() {
|
||||
return Ok(Protocol::err("ERR write permission denied"));
|
||||
}
|
||||
let new_fields = server.current_storage()?.hset(key, pairs.to_vec())?;
|
||||
Ok(Protocol::SimpleString(new_fields.to_string()))
|
||||
}
|
@@ -4,6 +4,8 @@ pub mod crypto;
|
||||
pub mod error;
|
||||
pub mod options;
|
||||
pub mod protocol;
|
||||
pub mod rpc;
|
||||
pub mod rpc_server;
|
||||
pub mod server;
|
||||
pub mod storage;
|
||||
pub mod storage_trait; // Add this
|
@@ -3,6 +3,7 @@
|
||||
use tokio::net::TcpListener;
|
||||
|
||||
use herodb::server;
|
||||
use herodb::rpc_server;
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
@@ -31,6 +32,14 @@ struct Args {
|
||||
#[arg(long)]
|
||||
encrypt: bool,
|
||||
|
||||
/// Enable RPC management server
|
||||
#[arg(long)]
|
||||
enable_rpc: bool,
|
||||
|
||||
/// RPC server port (default: 8080)
|
||||
#[arg(long, default_value = "8080")]
|
||||
rpc_port: u16,
|
||||
|
||||
/// Use the sled backend
|
||||
#[arg(long)]
|
||||
sled: bool,
|
||||
@@ -50,7 +59,8 @@ async fn main() {
|
||||
|
||||
// new DB option
|
||||
let option = herodb::options::DBOption {
|
||||
dir: args.dir,
|
||||
dir: args.dir.clone(),
|
||||
port,
|
||||
debug: args.debug,
|
||||
encryption_key: args.encryption_key,
|
||||
encrypt: args.encrypt,
|
||||
@@ -61,12 +71,36 @@ async fn main() {
|
||||
},
|
||||
};
|
||||
|
||||
let backend = option.backend.clone();
|
||||
|
||||
// new server
|
||||
let server = server::Server::new(option).await;
|
||||
let mut server = server::Server::new(option).await;
|
||||
|
||||
// Initialize the default database storage
|
||||
let _ = server.current_storage();
|
||||
|
||||
// Add a small delay to ensure the port is ready
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
|
||||
// Start RPC server if enabled
|
||||
let rpc_handle = if args.enable_rpc {
|
||||
let rpc_addr = format!("127.0.0.1:{}", args.rpc_port).parse().unwrap();
|
||||
let base_dir = args.dir.clone();
|
||||
|
||||
match rpc_server::start_rpc_server(rpc_addr, base_dir, backend).await {
|
||||
Ok(handle) => {
|
||||
println!("RPC management server started on port {}", args.rpc_port);
|
||||
Some(handle)
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Failed to start RPC server: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// accept new connections
|
||||
loop {
|
||||
let stream = listener.accept().await;
|
@@ -7,6 +7,7 @@ pub enum BackendType {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DBOption {
|
||||
pub dir: String,
|
||||
pub port: u16,
|
||||
pub debug: bool,
|
||||
pub encrypt: bool,
|
||||
pub encryption_key: Option<String>,
|
634
src/rpc.rs
Normal file
634
src/rpc.rs
Normal file
@@ -0,0 +1,634 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use jsonrpsee::{core::RpcResult, proc_macros::rpc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
use crate::server::Server;
|
||||
use crate::options::DBOption;
|
||||
|
||||
/// Database backend types
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum BackendType {
|
||||
Redb,
|
||||
Sled,
|
||||
// Future: InMemory, Custom(String)
|
||||
}
|
||||
|
||||
/// Database configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatabaseConfig {
|
||||
pub name: Option<String>,
|
||||
pub storage_path: Option<String>,
|
||||
pub max_size: Option<u64>,
|
||||
pub redis_version: Option<String>,
|
||||
}
|
||||
|
||||
/// Database information returned by metadata queries
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatabaseInfo {
|
||||
pub id: u64,
|
||||
pub name: Option<String>,
|
||||
pub backend: BackendType,
|
||||
pub encrypted: bool,
|
||||
pub redis_version: Option<String>,
|
||||
pub storage_path: Option<String>,
|
||||
pub size_on_disk: Option<u64>,
|
||||
pub key_count: Option<u64>,
|
||||
pub created_at: u64,
|
||||
pub last_access: Option<u64>,
|
||||
}
|
||||
|
||||
/// Access permissions for database keys
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub enum Permissions {
|
||||
Read,
|
||||
ReadWrite,
|
||||
}
|
||||
|
||||
/// Access key information
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AccessKey {
|
||||
pub hash: String,
|
||||
pub permissions: Permissions,
|
||||
pub created_at: u64,
|
||||
}
|
||||
|
||||
/// Database metadata containing access keys
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatabaseMeta {
|
||||
pub public: bool,
|
||||
pub keys: HashMap<String, AccessKey>,
|
||||
}
|
||||
|
||||
/// Access key information returned by RPC
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AccessKeyInfo {
|
||||
pub hash: String,
|
||||
pub permissions: Permissions,
|
||||
pub created_at: u64,
|
||||
}
|
||||
|
||||
/// Hash a plaintext key using SHA-256
|
||||
pub fn hash_key(key: &str) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(key.as_bytes());
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
/// RPC trait for HeroDB management
|
||||
#[rpc(server, client, namespace = "herodb")]
|
||||
pub trait Rpc {
|
||||
/// Create a new database with specified configuration
|
||||
#[method(name = "createDatabase")]
|
||||
async fn create_database(
|
||||
&self,
|
||||
backend: BackendType,
|
||||
config: DatabaseConfig,
|
||||
encryption_key: Option<String>,
|
||||
) -> RpcResult<u64>;
|
||||
|
||||
/// Set encryption for an existing database (write-only key)
|
||||
#[method(name = "setEncryption")]
|
||||
async fn set_encryption(&self, db_id: u64, encryption_key: String) -> RpcResult<bool>;
|
||||
|
||||
/// List all managed databases
|
||||
#[method(name = "listDatabases")]
|
||||
async fn list_databases(&self) -> RpcResult<Vec<DatabaseInfo>>;
|
||||
|
||||
/// Get detailed information about a specific database
|
||||
#[method(name = "getDatabaseInfo")]
|
||||
async fn get_database_info(&self, db_id: u64) -> RpcResult<DatabaseInfo>;
|
||||
|
||||
/// Delete a database
|
||||
#[method(name = "deleteDatabase")]
|
||||
async fn delete_database(&self, db_id: u64) -> RpcResult<bool>;
|
||||
|
||||
/// Get server statistics
|
||||
#[method(name = "getServerStats")]
|
||||
async fn get_server_stats(&self) -> RpcResult<HashMap<String, serde_json::Value>>;
|
||||
|
||||
/// Add an access key to a database
|
||||
#[method(name = "addAccessKey")]
|
||||
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool>;
|
||||
|
||||
/// Delete an access key from a database
|
||||
#[method(name = "deleteAccessKey")]
|
||||
async fn delete_access_key(&self, db_id: u64, key_hash: String) -> RpcResult<bool>;
|
||||
|
||||
/// List all access keys for a database
|
||||
#[method(name = "listAccessKeys")]
|
||||
async fn list_access_keys(&self, db_id: u64) -> RpcResult<Vec<AccessKeyInfo>>;
|
||||
|
||||
/// Set database public/private status
|
||||
#[method(name = "setDatabasePublic")]
|
||||
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool>;
|
||||
}
|
||||
|
||||
/// RPC Server implementation
|
||||
pub struct RpcServerImpl {
|
||||
/// Base directory for database files
|
||||
base_dir: String,
|
||||
/// Managed database servers
|
||||
servers: Arc<RwLock<HashMap<u64, Arc<Server>>>>,
|
||||
/// Next unencrypted database ID to assign
|
||||
next_unencrypted_id: Arc<RwLock<u64>>,
|
||||
/// Next encrypted database ID to assign
|
||||
next_encrypted_id: Arc<RwLock<u64>>,
|
||||
/// Default backend type
|
||||
backend: crate::options::BackendType,
|
||||
/// Encryption keys for databases
|
||||
encryption_keys: Arc<RwLock<HashMap<u64, Option<String>>>>,
|
||||
}
|
||||
|
||||
impl RpcServerImpl {
|
||||
/// Create a new RPC server instance
|
||||
pub fn new(base_dir: String, backend: crate::options::BackendType) -> Self {
|
||||
Self {
|
||||
base_dir,
|
||||
servers: Arc::new(RwLock::new(HashMap::new())),
|
||||
next_unencrypted_id: Arc::new(RwLock::new(0)),
|
||||
next_encrypted_id: Arc::new(RwLock::new(10)),
|
||||
backend,
|
||||
encryption_keys: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create a server instance for the given database ID
|
||||
async fn get_or_create_server(&self, db_id: u64) -> Result<Arc<Server>, jsonrpsee::types::ErrorObjectOwned> {
|
||||
// Check if server already exists
|
||||
{
|
||||
let servers = self.servers.read().await;
|
||||
if let Some(server) = servers.get(&db_id) {
|
||||
return Ok(server.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Check if database file exists
|
||||
let db_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}.db", db_id));
|
||||
if !db_path.exists() {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Database {} not found", db_id),
|
||||
None::<()>
|
||||
));
|
||||
}
|
||||
|
||||
// Create server instance with default options
|
||||
let db_option = DBOption {
|
||||
dir: self.base_dir.clone(),
|
||||
port: 0, // Not used for RPC-managed databases
|
||||
debug: false,
|
||||
encryption_key: None,
|
||||
encrypt: false,
|
||||
backend: self.backend.clone(),
|
||||
};
|
||||
|
||||
let mut server = Server::new(db_option).await;
|
||||
|
||||
// Set the selected database to the db_id for proper file naming
|
||||
server.selected_db = db_id;
|
||||
|
||||
// Store the server
|
||||
let mut servers = self.servers.write().await;
|
||||
servers.insert(db_id, Arc::new(server.clone()));
|
||||
|
||||
Ok(Arc::new(server))
|
||||
}
|
||||
|
||||
/// Discover existing database files in the base directory
|
||||
async fn discover_databases(&self) -> Vec<u64> {
|
||||
let mut db_ids = Vec::new();
|
||||
|
||||
if let Ok(entries) = std::fs::read_dir(&self.base_dir) {
|
||||
for entry in entries.flatten() {
|
||||
if let Ok(file_name) = entry.file_name().into_string() {
|
||||
// Check if it's a database file (ends with .db)
|
||||
if file_name.ends_with(".db") {
|
||||
// Extract database ID from filename (e.g., "11.db" -> 11)
|
||||
if let Some(id_str) = file_name.strip_suffix(".db") {
|
||||
if let Ok(db_id) = id_str.parse::<u64>() {
|
||||
db_ids.push(db_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
db_ids
|
||||
}
|
||||
|
||||
/// Get the next available database ID
|
||||
async fn get_next_db_id(&self, is_encrypted: bool) -> u64 {
|
||||
if is_encrypted {
|
||||
let mut id = self.next_encrypted_id.write().await;
|
||||
let current_id = *id;
|
||||
*id += 1;
|
||||
current_id
|
||||
} else {
|
||||
let mut id = self.next_unencrypted_id.write().await;
|
||||
let current_id = *id;
|
||||
*id += 1;
|
||||
current_id
|
||||
}
|
||||
}
|
||||
|
||||
/// Load database metadata from file (static version)
|
||||
pub async fn load_meta_static(base_dir: &str, db_id: u64) -> Result<DatabaseMeta, jsonrpsee::types::ErrorObjectOwned> {
|
||||
let meta_path = std::path::PathBuf::from(base_dir).join(format!("{}_meta.json", db_id));
|
||||
|
||||
// If meta file doesn't exist, return default
|
||||
if !meta_path.exists() {
|
||||
return Ok(DatabaseMeta {
|
||||
public: true,
|
||||
keys: HashMap::new(),
|
||||
});
|
||||
}
|
||||
|
||||
// Read file
|
||||
let content = std::fs::read(&meta_path)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to read meta file: {}", e),
|
||||
None::<()>
|
||||
))?;
|
||||
|
||||
let json_str = String::from_utf8(content)
|
||||
.map_err(|_| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
"Invalid UTF-8 in meta file",
|
||||
None::<()>
|
||||
))?;
|
||||
|
||||
serde_json::from_str(&json_str)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to parse meta JSON: {}", e),
|
||||
None::<()>
|
||||
))
|
||||
}
|
||||
|
||||
/// Load database metadata from file
|
||||
async fn load_meta(&self, db_id: u64) -> Result<DatabaseMeta, jsonrpsee::types::ErrorObjectOwned> {
|
||||
let meta_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}_meta.json", db_id));
|
||||
|
||||
// If meta file doesn't exist, create default
|
||||
if !meta_path.exists() {
|
||||
let default_meta = DatabaseMeta {
|
||||
public: true,
|
||||
keys: HashMap::new(),
|
||||
};
|
||||
self.save_meta(db_id, &default_meta).await?;
|
||||
return Ok(default_meta);
|
||||
}
|
||||
|
||||
// Read and potentially decrypt
|
||||
let content = std::fs::read(&meta_path)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to read meta file: {}", e),
|
||||
None::<()>
|
||||
))?;
|
||||
|
||||
let json_str = if db_id >= 10 {
|
||||
// Encrypted database, decrypt meta
|
||||
if let Some(key) = self.encryption_keys.read().await.get(&db_id).and_then(|k| k.as_ref()) {
|
||||
use crate::crypto::CryptoFactory;
|
||||
let crypto = CryptoFactory::new(key.as_bytes());
|
||||
String::from_utf8(crypto.decrypt(&content)
|
||||
.map_err(|_| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
"Failed to decrypt meta file",
|
||||
None::<()>
|
||||
))?)
|
||||
.map_err(|_| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
"Invalid UTF-8 in decrypted meta",
|
||||
None::<()>
|
||||
))?
|
||||
} else {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
"Encryption key not found for encrypted database",
|
||||
None::<()>
|
||||
));
|
||||
}
|
||||
} else {
|
||||
String::from_utf8(content)
|
||||
.map_err(|_| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
"Invalid UTF-8 in meta file",
|
||||
None::<()>
|
||||
))?
|
||||
};
|
||||
|
||||
serde_json::from_str(&json_str)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to parse meta JSON: {}", e),
|
||||
None::<()>
|
||||
))
|
||||
}
|
||||
|
||||
/// Save database metadata to file (static version)
|
||||
pub async fn save_meta_static(base_dir: &str, db_id: u64, meta: &DatabaseMeta) -> Result<(), jsonrpsee::types::ErrorObjectOwned> {
|
||||
let meta_path = std::path::PathBuf::from(base_dir).join(format!("{}_meta.json", db_id));
|
||||
|
||||
let json_str = serde_json::to_string(meta)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to serialize meta: {}", e),
|
||||
None::<()>
|
||||
))?;
|
||||
|
||||
std::fs::write(&meta_path, json_str)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to write meta file: {}", e),
|
||||
None::<()>
|
||||
))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Save database metadata to file
|
||||
async fn save_meta(&self, db_id: u64, meta: &DatabaseMeta) -> Result<(), jsonrpsee::types::ErrorObjectOwned> {
|
||||
let meta_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}_meta.json", db_id));
|
||||
|
||||
let json_str = serde_json::to_string(meta)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to serialize meta: {}", e),
|
||||
None::<()>
|
||||
))?;
|
||||
|
||||
if db_id >= 10 {
|
||||
// Encrypted database, encrypt meta
|
||||
if let Some(key) = self.encryption_keys.read().await.get(&db_id).and_then(|k| k.as_ref()) {
|
||||
use crate::crypto::CryptoFactory;
|
||||
let crypto = CryptoFactory::new(key.as_bytes());
|
||||
let encrypted = crypto.encrypt(json_str.as_bytes());
|
||||
std::fs::write(&meta_path, encrypted)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to write encrypted meta file: {}", e),
|
||||
None::<()>
|
||||
))?;
|
||||
} else {
|
||||
return Err(jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
"Encryption key not found for encrypted database",
|
||||
None::<()>
|
||||
));
|
||||
}
|
||||
} else {
|
||||
std::fs::write(&meta_path, json_str)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to write meta file: {}", e),
|
||||
None::<()>
|
||||
))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[jsonrpsee::core::async_trait]
|
||||
impl RpcServer for RpcServerImpl {
|
||||
async fn create_database(
|
||||
&self,
|
||||
backend: BackendType,
|
||||
config: DatabaseConfig,
|
||||
encryption_key: Option<String>,
|
||||
) -> RpcResult<u64> {
|
||||
let db_id = self.get_next_db_id(encryption_key.is_some()).await;
|
||||
|
||||
// Handle both Redb and Sled backends
|
||||
match backend {
|
||||
BackendType::Redb | BackendType::Sled => {
|
||||
// Create database directory
|
||||
let db_dir = if let Some(path) = &config.storage_path {
|
||||
std::path::PathBuf::from(path)
|
||||
} else {
|
||||
std::path::PathBuf::from(&self.base_dir).join(format!("rpc_db_{}", db_id))
|
||||
};
|
||||
|
||||
// Ensure directory exists
|
||||
std::fs::create_dir_all(&db_dir)
|
||||
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
format!("Failed to create directory: {}", e),
|
||||
None::<()>
|
||||
))?;
|
||||
|
||||
// Create DB options
|
||||
let encrypt = encryption_key.is_some();
|
||||
let option = DBOption {
|
||||
dir: db_dir.to_string_lossy().to_string(),
|
||||
port: 0, // Not used for RPC-managed databases
|
||||
debug: false,
|
||||
encryption_key: encryption_key.clone(),
|
||||
encrypt,
|
||||
backend: match backend {
|
||||
BackendType::Redb => crate::options::BackendType::Redb,
|
||||
BackendType::Sled => crate::options::BackendType::Sled,
|
||||
},
|
||||
};
|
||||
|
||||
// Create server instance
|
||||
let mut server = Server::new(option).await;
|
||||
|
||||
// Set the selected database to the db_id for proper file naming
|
||||
server.selected_db = db_id;
|
||||
|
||||
// Initialize the storage to create the database file
|
||||
let _ = server.current_storage();
|
||||
|
||||
// Store the encryption key
|
||||
{
|
||||
let mut keys = self.encryption_keys.write().await;
|
||||
keys.insert(db_id, encryption_key.clone());
|
||||
}
|
||||
|
||||
// Initialize meta file
|
||||
let meta = DatabaseMeta {
|
||||
public: true,
|
||||
keys: HashMap::new(),
|
||||
};
|
||||
self.save_meta(db_id, &meta).await?;
|
||||
|
||||
// Store the server
|
||||
let mut servers = self.servers.write().await;
|
||||
servers.insert(db_id, Arc::new(server));
|
||||
|
||||
Ok(db_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn set_encryption(&self, db_id: u64, _encryption_key: String) -> RpcResult<bool> {
|
||||
// Note: In a real implementation, we'd need to modify the existing database
|
||||
// For now, return false as encryption can only be set during creation
|
||||
let _servers = self.servers.read().await;
|
||||
// TODO: Implement encryption setting for existing databases
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
async fn list_databases(&self) -> RpcResult<Vec<DatabaseInfo>> {
|
||||
let db_ids = self.discover_databases().await;
|
||||
let mut result = Vec::new();
|
||||
|
||||
for db_id in db_ids {
|
||||
// Try to get or create server for this database
|
||||
if let Ok(server) = self.get_or_create_server(db_id).await {
|
||||
let backend = match server.option.backend {
|
||||
crate::options::BackendType::Redb => BackendType::Redb,
|
||||
crate::options::BackendType::Sled => BackendType::Sled,
|
||||
};
|
||||
|
||||
let info = DatabaseInfo {
|
||||
id: db_id,
|
||||
name: None, // TODO: Store name in server metadata
|
||||
backend,
|
||||
encrypted: server.option.encrypt,
|
||||
redis_version: Some("7.0".to_string()), // Default Redis compatibility
|
||||
storage_path: Some(server.option.dir.clone()),
|
||||
size_on_disk: None, // TODO: Calculate actual size
|
||||
key_count: None, // TODO: Get key count from storage
|
||||
created_at: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs(),
|
||||
last_access: None,
|
||||
};
|
||||
result.push(info);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn get_database_info(&self, db_id: u64) -> RpcResult<DatabaseInfo> {
|
||||
let server = self.get_or_create_server(db_id).await?;
|
||||
|
||||
let backend = match server.option.backend {
|
||||
crate::options::BackendType::Redb => BackendType::Redb,
|
||||
crate::options::BackendType::Sled => BackendType::Sled,
|
||||
};
|
||||
|
||||
Ok(DatabaseInfo {
|
||||
id: db_id,
|
||||
name: None,
|
||||
backend,
|
||||
encrypted: server.option.encrypt,
|
||||
redis_version: Some("7.0".to_string()),
|
||||
storage_path: Some(server.option.dir.clone()),
|
||||
size_on_disk: None,
|
||||
key_count: None,
|
||||
created_at: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs(),
|
||||
last_access: None,
|
||||
})
|
||||
}
|
||||
|
||||
async fn delete_database(&self, db_id: u64) -> RpcResult<bool> {
|
||||
let mut servers = self.servers.write().await;
|
||||
|
||||
if let Some(_server) = servers.remove(&db_id) {
|
||||
// Clean up database files
|
||||
let db_path = std::path::PathBuf::from(&self.base_dir).join(format!("{}.db", db_id));
|
||||
if db_path.exists() {
|
||||
if db_path.is_dir() {
|
||||
std::fs::remove_dir_all(&db_path).ok();
|
||||
} else {
|
||||
std::fs::remove_file(&db_path).ok();
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_server_stats(&self) -> RpcResult<HashMap<String, serde_json::Value>> {
|
||||
let db_ids = self.discover_databases().await;
|
||||
let mut stats = HashMap::new();
|
||||
|
||||
stats.insert("total_databases".to_string(), serde_json::json!(db_ids.len()));
|
||||
stats.insert("uptime".to_string(), serde_json::json!(
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
));
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool> {
|
||||
let mut meta = self.load_meta(db_id).await?;
|
||||
|
||||
let perms = match permissions.to_lowercase().as_str() {
|
||||
"read" => Permissions::Read,
|
||||
"readwrite" => Permissions::ReadWrite,
|
||||
_ => return Err(jsonrpsee::types::ErrorObjectOwned::owned(
|
||||
-32000,
|
||||
"Invalid permissions: use 'read' or 'readwrite'",
|
||||
None::<()>
|
||||
)),
|
||||
};
|
||||
|
||||
let hash = hash_key(&key);
|
||||
let access_key = AccessKey {
|
||||
hash: hash.clone(),
|
||||
permissions: perms,
|
||||
created_at: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs(),
|
||||
};
|
||||
|
||||
meta.keys.insert(hash, access_key);
|
||||
self.save_meta(db_id, &meta).await?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn delete_access_key(&self, db_id: u64, key_hash: String) -> RpcResult<bool> {
|
||||
let mut meta = self.load_meta(db_id).await?;
|
||||
|
||||
if meta.keys.remove(&key_hash).is_some() {
|
||||
// If no keys left, make database public
|
||||
if meta.keys.is_empty() {
|
||||
meta.public = true;
|
||||
}
|
||||
self.save_meta(db_id, &meta).await?;
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_access_keys(&self, db_id: u64) -> RpcResult<Vec<AccessKeyInfo>> {
|
||||
let meta = self.load_meta(db_id).await?;
|
||||
let keys: Vec<AccessKeyInfo> = meta.keys.values()
|
||||
.map(|k| AccessKeyInfo {
|
||||
hash: k.hash.clone(),
|
||||
permissions: k.permissions.clone(),
|
||||
created_at: k.created_at,
|
||||
})
|
||||
.collect();
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool> {
|
||||
let mut meta = self.load_meta(db_id).await?;
|
||||
meta.public = public;
|
||||
self.save_meta(db_id, &meta).await?;
|
||||
Ok(true)
|
||||
}
|
||||
}
|
49
src/rpc_server.rs
Normal file
49
src/rpc_server.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use std::net::SocketAddr;
|
||||
use jsonrpsee::server::{ServerBuilder, ServerHandle};
|
||||
use jsonrpsee::RpcModule;
|
||||
|
||||
use crate::rpc::{RpcServer, RpcServerImpl};
|
||||
|
||||
/// Start the RPC server on the specified address
|
||||
pub async fn start_rpc_server(addr: SocketAddr, base_dir: String, backend: crate::options::BackendType) -> Result<ServerHandle, Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Create the RPC server implementation
|
||||
let rpc_impl = RpcServerImpl::new(base_dir, backend);
|
||||
|
||||
// Create the RPC module
|
||||
let mut module = RpcModule::new(());
|
||||
module.merge(RpcServer::into_rpc(rpc_impl))?;
|
||||
|
||||
// Build the server with both HTTP and WebSocket support
|
||||
let server = ServerBuilder::default()
|
||||
.build(addr)
|
||||
.await?;
|
||||
|
||||
// Start the server
|
||||
let handle = server.start(module);
|
||||
|
||||
println!("RPC server started on {}", addr);
|
||||
|
||||
Ok(handle)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::time::Duration;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rpc_server_startup() {
|
||||
let addr = "127.0.0.1:0".parse().unwrap(); // Use port 0 for auto-assignment
|
||||
let base_dir = "/tmp/test_rpc".to_string();
|
||||
let backend = crate::options::BackendType::Redb; // Default for test
|
||||
|
||||
let handle = start_rpc_server(addr, base_dir, backend).await.unwrap();
|
||||
|
||||
// Give the server a moment to start
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
|
||||
// Stop the server
|
||||
handle.stop().unwrap();
|
||||
handle.stopped().await;
|
||||
}
|
||||
}
|
@@ -22,6 +22,7 @@ pub struct Server {
|
||||
pub client_name: Option<String>,
|
||||
pub selected_db: u64, // Changed from usize to u64
|
||||
pub queued_cmd: Option<Vec<(Cmd, Protocol)>>,
|
||||
pub current_permissions: Option<crate::rpc::Permissions>,
|
||||
|
||||
// BLPOP waiter registry: per (db_index, key) FIFO of waiters
|
||||
pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>,
|
||||
@@ -48,6 +49,7 @@ impl Server {
|
||||
client_name: None,
|
||||
selected_db: 0,
|
||||
queued_cmd: None,
|
||||
current_permissions: None,
|
||||
|
||||
list_waiters: Arc::new(Mutex::new(HashMap::new())),
|
||||
waiter_seq: Arc::new(AtomicU64::new(1)),
|
||||
@@ -101,6 +103,16 @@ impl Server {
|
||||
self.option.encrypt && db_index >= 10
|
||||
}
|
||||
|
||||
/// Check if current permissions allow read operations
|
||||
pub fn has_read_permission(&self) -> bool {
|
||||
matches!(self.current_permissions, Some(crate::rpc::Permissions::Read) | Some(crate::rpc::Permissions::ReadWrite))
|
||||
}
|
||||
|
||||
/// Check if current permissions allow write operations
|
||||
pub fn has_write_permission(&self) -> bool {
|
||||
matches!(self.current_permissions, Some(crate::rpc::Permissions::ReadWrite))
|
||||
}
|
||||
|
||||
// ----- BLPOP waiter helpers -----
|
||||
|
||||
pub async fn register_waiter(&self, db_index: u64, key: &str, side: PopSide) -> (u64, oneshot::Receiver<(String, String)>) {
|
@@ -27,6 +27,7 @@ async fn debug_hset_simple() {
|
||||
debug: false,
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
};
|
||||
|
||||
let mut server = Server::new(option).await;
|
@@ -18,6 +18,7 @@ async fn debug_hset_return_value() {
|
||||
debug: false,
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
};
|
||||
|
||||
let mut server = Server::new(option).await;
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
debug: true,
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
};
|
||||
|
||||
let server = Server::new(option).await;
|
62
tests/rpc_tests.rs
Normal file
62
tests/rpc_tests.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use std::net::SocketAddr;
|
||||
use jsonrpsee::http_client::HttpClientBuilder;
|
||||
use jsonrpsee::core::client::ClientT;
|
||||
use serde_json::json;
|
||||
|
||||
use herodb::rpc::{RpcClient, BackendType, DatabaseConfig};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rpc_server_basic() {
|
||||
// This test would require starting the RPC server in a separate thread
|
||||
// For now, we'll just test that the types compile correctly
|
||||
|
||||
// Test serialization of types
|
||||
let backend = BackendType::Redb;
|
||||
let config = DatabaseConfig {
|
||||
name: Some("test_db".to_string()),
|
||||
storage_path: Some("/tmp/test".to_string()),
|
||||
max_size: Some(1024 * 1024),
|
||||
redis_version: Some("7.0".to_string()),
|
||||
};
|
||||
|
||||
let backend_json = serde_json::to_string(&backend).unwrap();
|
||||
let config_json = serde_json::to_string(&config).unwrap();
|
||||
|
||||
assert_eq!(backend_json, "\"Redb\"");
|
||||
assert!(config_json.contains("test_db"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_database_config_serialization() {
|
||||
let config = DatabaseConfig {
|
||||
name: Some("my_db".to_string()),
|
||||
storage_path: None,
|
||||
max_size: Some(1000000),
|
||||
redis_version: Some("7.0".to_string()),
|
||||
};
|
||||
|
||||
let json = serde_json::to_value(&config).unwrap();
|
||||
assert_eq!(json["name"], "my_db");
|
||||
assert_eq!(json["max_size"], 1000000);
|
||||
assert_eq!(json["redis_version"], "7.0");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_backend_type_serialization() {
|
||||
// Test that both Redb and Sled backends serialize correctly
|
||||
let redb_backend = BackendType::Redb;
|
||||
let sled_backend = BackendType::Sled;
|
||||
|
||||
let redb_json = serde_json::to_string(&redb_backend).unwrap();
|
||||
let sled_json = serde_json::to_string(&sled_backend).unwrap();
|
||||
|
||||
assert_eq!(redb_json, "\"Redb\"");
|
||||
assert_eq!(sled_json, "\"Sled\"");
|
||||
|
||||
// Test deserialization
|
||||
let redb_deserialized: BackendType = serde_json::from_str(&redb_json).unwrap();
|
||||
let sled_deserialized: BackendType = serde_json::from_str(&sled_json).unwrap();
|
||||
|
||||
assert!(matches!(redb_deserialized, BackendType::Redb));
|
||||
assert!(matches!(sled_deserialized, BackendType::Sled));
|
||||
}
|
@@ -24,6 +24,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
debug: true,
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
};
|
||||
|
||||
let server = Server::new(option).await;
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
debug: false,
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
};
|
||||
|
||||
let server = Server::new(option).await;
|
@@ -22,6 +22,7 @@ async fn start_test_server(test_name: &str) -> (Server, u16) {
|
||||
debug: false,
|
||||
encrypt: false,
|
||||
encryption_key: None,
|
||||
backend: herodb::options::BackendType::Redb,
|
||||
};
|
||||
|
||||
let server = Server::new(option).await;
|
||||
@@ -500,11 +501,11 @@ async fn test_07_age_stateless_suite() {
|
||||
let mut s = connect(port).await;
|
||||
|
||||
// GENENC -> [recipient, identity]
|
||||
let gen = send_cmd(&mut s, &["AGE", "GENENC"]).await;
|
||||
let genenc = send_cmd(&mut s, &["AGE", "GENENC"]).await;
|
||||
assert!(
|
||||
gen.starts_with("*2\r\n$"),
|
||||
genenc.starts_with("*2\r\n$"),
|
||||
"AGE GENENC should return array [recipient, identity], got:\n{}",
|
||||
gen
|
||||
genenc
|
||||
);
|
||||
|
||||
// Parse simple RESP array of two bulk strings to extract keys
|
||||
@@ -519,7 +520,7 @@ async fn test_07_age_stateless_suite() {
|
||||
let ident = lines.next().unwrap_or("").to_string();
|
||||
(recip, ident)
|
||||
}
|
||||
let (recipient, identity) = parse_two_bulk_array(&gen);
|
||||
let (recipient, identity) = parse_two_bulk_array(&genenc);
|
||||
assert!(
|
||||
recipient.starts_with("age1") && identity.starts_with("AGE-SECRET-KEY-1"),
|
||||
"Unexpected AGE key formats.\nrecipient: {}\nidentity: {}",
|
Reference in New Issue
Block a user