Files
herodb/examples/lance_vector_demo.sh
2025-08-25 06:00:08 +02:00

426 lines
14 KiB
Bash
Executable File

#!/bin/bash
# Lance Vector Database Demo Script
# This script demonstrates all Lance vector database operations in HeroDB
set -e # Exit on any error
# Configuration
REDIS_HOST="localhost"
REDIS_PORT="6379"
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Helper functions
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
execute_command() {
local cmd="$1"
local description="$2"
echo
log_info "Executing: $description"
echo "Command: $cmd"
if result=$($cmd 2>&1); then
log_success "Result: $result"
else
log_error "Failed: $result"
return 1
fi
}
# Check if HeroDB is running
check_herodb() {
log_info "Checking if HeroDB is running..."
if ! $REDIS_CLI ping > /dev/null 2>&1; then
log_error "HeroDB is not running. Please start it first:"
echo " cargo run -- --dir ./test_data --port $REDIS_PORT"
exit 1
fi
log_success "HeroDB is running"
}
# Setup embedding service configuration
setup_embedding_service() {
log_info "Setting up embedding service configuration..."
# Note: This is a mock URL for demonstration
# In production, replace with your actual embedding service
execute_command \
"$REDIS_CLI HSET config:core:aiembed url 'http://localhost:8080/embed'" \
"Configure embedding service URL"
# Optional: Set authentication token
# execute_command \
# "$REDIS_CLI HSET config:core:aiembed token 'your-api-token'" \
# "Configure embedding service token"
log_warning "Note: Embedding service at http://localhost:8080/embed is not running."
log_warning "Some operations will fail, but this demonstrates the command structure."
}
# Dataset Management Operations
demo_dataset_management() {
echo
echo "=========================================="
echo " DATASET MANAGEMENT DEMO"
echo "=========================================="
# List datasets (should be empty initially)
execute_command \
"$REDIS_CLI LANCE LIST" \
"List all datasets (initially empty)"
# Create a simple dataset
execute_command \
"$REDIS_CLI LANCE CREATE documents DIM 384" \
"Create a simple document dataset with 384 dimensions"
# Create a dataset with schema
execute_command \
"$REDIS_CLI LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool description:string" \
"Create products dataset with custom schema"
# Create an image dataset
execute_command \
"$REDIS_CLI LANCE CREATE images DIM 512 SCHEMA filename:string tags:string width:int height:int" \
"Create images dataset for multimodal content"
# List datasets again
execute_command \
"$REDIS_CLI LANCE LIST" \
"List all datasets (should show 3 datasets)"
# Get info about datasets
execute_command \
"$REDIS_CLI LANCE INFO documents" \
"Get information about documents dataset"
execute_command \
"$REDIS_CLI LANCE INFO products" \
"Get information about products dataset"
}
# Embedding Operations
demo_embedding_operations() {
echo
echo "=========================================="
echo " EMBEDDING OPERATIONS DEMO"
echo "=========================================="
log_warning "The following operations will fail because no embedding service is running."
log_warning "This demonstrates the command structure and error handling."
# Try to embed text (will fail without embedding service)
execute_command \
"$REDIS_CLI LANCE EMBED.TEXT 'Hello world'" \
"Generate embedding for single text" || true
# Try to embed multiple texts
execute_command \
"$REDIS_CLI LANCE EMBED.TEXT 'Machine learning' 'Artificial intelligence' 'Deep learning'" \
"Generate embeddings for multiple texts" || true
}
# Data Storage Operations
demo_data_storage() {
echo
echo "=========================================="
echo " DATA STORAGE DEMO"
echo "=========================================="
log_warning "Storage operations will fail without embedding service, but show command structure."
# Store text documents
execute_command \
"$REDIS_CLI LANCE STORE documents TEXT 'Introduction to machine learning algorithms and their applications in modern AI systems' category 'education' author 'John Doe' difficulty 'beginner'" \
"Store a document with text and metadata" || true
execute_command \
"$REDIS_CLI LANCE STORE documents TEXT 'Deep learning neural networks for computer vision tasks' category 'research' author 'Jane Smith' difficulty 'advanced'" \
"Store another document" || true
# Store product information
execute_command \
"$REDIS_CLI LANCE STORE products TEXT 'High-performance laptop with 16GB RAM and SSD storage' category 'electronics' price '1299.99' available 'true'" \
"Store product with text description" || true
# Store image with metadata (using placeholder base64)
execute_command \
"$REDIS_CLI LANCE STORE images IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sample.png' tags 'test,demo' width '1' height '1'" \
"Store image with metadata (1x1 pixel PNG)" || true
# Store multimodal content
execute_command \
"$REDIS_CLI LANCE STORE images TEXT 'Beautiful sunset over mountains' IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sunset.png' tags 'nature,landscape' location 'California'" \
"Store multimodal content (text + image)" || true
}
# Search Operations
demo_search_operations() {
echo
echo "=========================================="
echo " SEARCH OPERATIONS DEMO"
echo "=========================================="
log_warning "Search operations will fail without data, but show command structure."
# Search with raw vector
execute_command \
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 5" \
"Search with raw vector (5 results)" || true
# Search with vector and parameters
execute_command \
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 10 NPROBES 20 REFINE 2" \
"Search with vector and advanced parameters" || true
# Text-based search
execute_command \
"$REDIS_CLI LANCE SEARCH.TEXT documents 'machine learning algorithms' K 5" \
"Search using text query" || true
# Text search with parameters
execute_command \
"$REDIS_CLI LANCE SEARCH.TEXT products 'laptop computer' K 3 NPROBES 10" \
"Search products using text with parameters" || true
# Search in image dataset
execute_command \
"$REDIS_CLI LANCE SEARCH.TEXT images 'sunset landscape' K 5" \
"Search images using text description" || true
}
# Index Management Operations
demo_index_management() {
echo
echo "=========================================="
echo " INDEX MANAGEMENT DEMO"
echo "=========================================="
# Create indexes for better search performance
execute_command \
"$REDIS_CLI LANCE CREATE.INDEX documents IVF_PQ" \
"Create default IVF_PQ index for documents"
execute_command \
"$REDIS_CLI LANCE CREATE.INDEX products IVF_PQ PARTITIONS 512 SUBVECTORS 32" \
"Create IVF_PQ index with custom parameters for products"
execute_command \
"$REDIS_CLI LANCE CREATE.INDEX images IVF_PQ PARTITIONS 256 SUBVECTORS 16" \
"Create IVF_PQ index for images dataset"
log_success "Indexes created successfully"
}
# Advanced Usage Examples
demo_advanced_usage() {
echo
echo "=========================================="
echo " ADVANCED USAGE EXAMPLES"
echo "=========================================="
# Create a specialized dataset for semantic search
execute_command \
"$REDIS_CLI LANCE CREATE semantic_search DIM 1536 SCHEMA title:string content:string url:string timestamp:string source:string" \
"Create dataset for semantic search with rich metadata"
# Demonstrate batch operations concept
log_info "Batch operations example (would store multiple items):"
echo " for doc in documents:"
echo " LANCE STORE semantic_search TEXT \"\$doc_content\" title \"\$title\" url \"\$url\""
# Show monitoring commands
log_info "Monitoring and maintenance commands:"
execute_command \
"$REDIS_CLI LANCE LIST" \
"List all datasets for monitoring"
# Show dataset statistics
for dataset in documents products images semantic_search; do
execute_command \
"$REDIS_CLI LANCE INFO $dataset" \
"Get statistics for $dataset" || true
done
}
# Cleanup Operations
demo_cleanup() {
echo
echo "=========================================="
echo " CLEANUP OPERATIONS DEMO"
echo "=========================================="
log_info "Demonstrating cleanup operations..."
# Drop individual datasets
execute_command \
"$REDIS_CLI LANCE DROP semantic_search" \
"Drop semantic_search dataset"
# List remaining datasets
execute_command \
"$REDIS_CLI LANCE LIST" \
"List remaining datasets"
# Ask user if they want to clean up all test data
echo
read -p "Do you want to clean up all test datasets? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
execute_command \
"$REDIS_CLI LANCE DROP documents" \
"Drop documents dataset"
execute_command \
"$REDIS_CLI LANCE DROP products" \
"Drop products dataset"
execute_command \
"$REDIS_CLI LANCE DROP images" \
"Drop images dataset"
execute_command \
"$REDIS_CLI LANCE LIST" \
"Verify all datasets are cleaned up"
log_success "All test datasets cleaned up"
else
log_info "Keeping test datasets for further experimentation"
fi
}
# Error Handling Demo
demo_error_handling() {
echo
echo "=========================================="
echo " ERROR HANDLING DEMO"
echo "=========================================="
log_info "Demonstrating various error conditions..."
# Try to access non-existent dataset
execute_command \
"$REDIS_CLI LANCE INFO nonexistent_dataset" \
"Try to get info for non-existent dataset" || true
# Try to search non-existent dataset
execute_command \
"$REDIS_CLI LANCE SEARCH nonexistent_dataset VECTOR '0.1,0.2' K 5" \
"Try to search non-existent dataset" || true
# Try to drop non-existent dataset
execute_command \
"$REDIS_CLI LANCE DROP nonexistent_dataset" \
"Try to drop non-existent dataset" || true
# Try invalid vector format
execute_command \
"$REDIS_CLI LANCE SEARCH documents VECTOR 'invalid,vector,format' K 5" \
"Try search with invalid vector format" || true
log_info "Error handling demonstration complete"
}
# Performance Testing Demo
demo_performance_testing() {
echo
echo "=========================================="
echo " PERFORMANCE TESTING DEMO"
echo "=========================================="
log_info "Creating performance test dataset..."
execute_command \
"$REDIS_CLI LANCE CREATE perf_test DIM 128 SCHEMA batch_id:string item_id:string" \
"Create performance test dataset"
log_info "Performance testing would involve:"
echo " 1. Bulk loading thousands of vectors"
echo " 2. Creating indexes with different parameters"
echo " 3. Measuring search latency with various K values"
echo " 4. Testing different NPROBES settings"
echo " 5. Monitoring memory usage"
log_info "Example performance test commands:"
echo " # Test search speed with different parameters"
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10"
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10 NPROBES 50"
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 100 NPROBES 100"
# Clean up performance test dataset
execute_command \
"$REDIS_CLI LANCE DROP perf_test" \
"Clean up performance test dataset"
}
# Main execution
main() {
echo "=========================================="
echo " LANCE VECTOR DATABASE DEMO SCRIPT"
echo "=========================================="
echo
echo "This script demonstrates all Lance vector database operations."
echo "Note: Some operations will fail without a running embedding service."
echo "This is expected and demonstrates error handling."
echo
# Check prerequisites
check_herodb
# Setup
setup_embedding_service
# Run demos
demo_dataset_management
demo_embedding_operations
demo_data_storage
demo_search_operations
demo_index_management
demo_advanced_usage
demo_error_handling
demo_performance_testing
# Cleanup
demo_cleanup
echo
echo "=========================================="
echo " DEMO COMPLETE"
echo "=========================================="
echo
log_success "Lance vector database demo completed successfully!"
echo
echo "Next steps:"
echo "1. Set up a real embedding service (OpenAI, Hugging Face, etc.)"
echo "2. Update the embedding service URL configuration"
echo "3. Try storing and searching real data"
echo "4. Experiment with different vector dimensions and index parameters"
echo "5. Build your AI-powered application!"
echo
echo "For more information, see docs/lance_vector_db.md"
}
# Run the demo
main "$@"