426 lines
14 KiB
Bash
Executable File
426 lines
14 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Lance Vector Database Demo Script
|
|
# This script demonstrates all Lance vector database operations in HeroDB
|
|
|
|
set -e # Exit on any error
|
|
|
|
# Configuration
|
|
REDIS_HOST="localhost"
|
|
REDIS_PORT="6379"
|
|
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Helper functions
|
|
log_info() {
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
|
}
|
|
|
|
log_success() {
|
|
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
|
}
|
|
|
|
log_warning() {
|
|
echo -e "${YELLOW}[WARNING]${NC} $1"
|
|
}
|
|
|
|
log_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1"
|
|
}
|
|
|
|
execute_command() {
|
|
local cmd="$1"
|
|
local description="$2"
|
|
|
|
echo
|
|
log_info "Executing: $description"
|
|
echo "Command: $cmd"
|
|
|
|
if result=$($cmd 2>&1); then
|
|
log_success "Result: $result"
|
|
else
|
|
log_error "Failed: $result"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check if HeroDB is running
|
|
check_herodb() {
|
|
log_info "Checking if HeroDB is running..."
|
|
if ! $REDIS_CLI ping > /dev/null 2>&1; then
|
|
log_error "HeroDB is not running. Please start it first:"
|
|
echo " cargo run -- --dir ./test_data --port $REDIS_PORT"
|
|
exit 1
|
|
fi
|
|
log_success "HeroDB is running"
|
|
}
|
|
|
|
# Setup embedding service configuration
|
|
setup_embedding_service() {
|
|
log_info "Setting up embedding service configuration..."
|
|
|
|
# Note: This is a mock URL for demonstration
|
|
# In production, replace with your actual embedding service
|
|
execute_command \
|
|
"$REDIS_CLI HSET config:core:aiembed url 'http://localhost:8080/embed'" \
|
|
"Configure embedding service URL"
|
|
|
|
# Optional: Set authentication token
|
|
# execute_command \
|
|
# "$REDIS_CLI HSET config:core:aiembed token 'your-api-token'" \
|
|
# "Configure embedding service token"
|
|
|
|
log_warning "Note: Embedding service at http://localhost:8080/embed is not running."
|
|
log_warning "Some operations will fail, but this demonstrates the command structure."
|
|
}
|
|
|
|
# Dataset Management Operations
|
|
demo_dataset_management() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " DATASET MANAGEMENT DEMO"
|
|
echo "=========================================="
|
|
|
|
# List datasets (should be empty initially)
|
|
execute_command \
|
|
"$REDIS_CLI LANCE LIST" \
|
|
"List all datasets (initially empty)"
|
|
|
|
# Create a simple dataset
|
|
execute_command \
|
|
"$REDIS_CLI LANCE CREATE documents DIM 384" \
|
|
"Create a simple document dataset with 384 dimensions"
|
|
|
|
# Create a dataset with schema
|
|
execute_command \
|
|
"$REDIS_CLI LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool description:string" \
|
|
"Create products dataset with custom schema"
|
|
|
|
# Create an image dataset
|
|
execute_command \
|
|
"$REDIS_CLI LANCE CREATE images DIM 512 SCHEMA filename:string tags:string width:int height:int" \
|
|
"Create images dataset for multimodal content"
|
|
|
|
# List datasets again
|
|
execute_command \
|
|
"$REDIS_CLI LANCE LIST" \
|
|
"List all datasets (should show 3 datasets)"
|
|
|
|
# Get info about datasets
|
|
execute_command \
|
|
"$REDIS_CLI LANCE INFO documents" \
|
|
"Get information about documents dataset"
|
|
|
|
execute_command \
|
|
"$REDIS_CLI LANCE INFO products" \
|
|
"Get information about products dataset"
|
|
}
|
|
|
|
# Embedding Operations
|
|
demo_embedding_operations() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " EMBEDDING OPERATIONS DEMO"
|
|
echo "=========================================="
|
|
|
|
log_warning "The following operations will fail because no embedding service is running."
|
|
log_warning "This demonstrates the command structure and error handling."
|
|
|
|
# Try to embed text (will fail without embedding service)
|
|
execute_command \
|
|
"$REDIS_CLI LANCE EMBED.TEXT 'Hello world'" \
|
|
"Generate embedding for single text" || true
|
|
|
|
# Try to embed multiple texts
|
|
execute_command \
|
|
"$REDIS_CLI LANCE EMBED.TEXT 'Machine learning' 'Artificial intelligence' 'Deep learning'" \
|
|
"Generate embeddings for multiple texts" || true
|
|
}
|
|
|
|
# Data Storage Operations
|
|
demo_data_storage() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " DATA STORAGE DEMO"
|
|
echo "=========================================="
|
|
|
|
log_warning "Storage operations will fail without embedding service, but show command structure."
|
|
|
|
# Store text documents
|
|
execute_command \
|
|
"$REDIS_CLI LANCE STORE documents TEXT 'Introduction to machine learning algorithms and their applications in modern AI systems' category 'education' author 'John Doe' difficulty 'beginner'" \
|
|
"Store a document with text and metadata" || true
|
|
|
|
execute_command \
|
|
"$REDIS_CLI LANCE STORE documents TEXT 'Deep learning neural networks for computer vision tasks' category 'research' author 'Jane Smith' difficulty 'advanced'" \
|
|
"Store another document" || true
|
|
|
|
# Store product information
|
|
execute_command \
|
|
"$REDIS_CLI LANCE STORE products TEXT 'High-performance laptop with 16GB RAM and SSD storage' category 'electronics' price '1299.99' available 'true'" \
|
|
"Store product with text description" || true
|
|
|
|
# Store image with metadata (using placeholder base64)
|
|
execute_command \
|
|
"$REDIS_CLI LANCE STORE images IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sample.png' tags 'test,demo' width '1' height '1'" \
|
|
"Store image with metadata (1x1 pixel PNG)" || true
|
|
|
|
# Store multimodal content
|
|
execute_command \
|
|
"$REDIS_CLI LANCE STORE images TEXT 'Beautiful sunset over mountains' IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sunset.png' tags 'nature,landscape' location 'California'" \
|
|
"Store multimodal content (text + image)" || true
|
|
}
|
|
|
|
# Search Operations
|
|
demo_search_operations() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " SEARCH OPERATIONS DEMO"
|
|
echo "=========================================="
|
|
|
|
log_warning "Search operations will fail without data, but show command structure."
|
|
|
|
# Search with raw vector
|
|
execute_command \
|
|
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 5" \
|
|
"Search with raw vector (5 results)" || true
|
|
|
|
# Search with vector and parameters
|
|
execute_command \
|
|
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 10 NPROBES 20 REFINE 2" \
|
|
"Search with vector and advanced parameters" || true
|
|
|
|
# Text-based search
|
|
execute_command \
|
|
"$REDIS_CLI LANCE SEARCH.TEXT documents 'machine learning algorithms' K 5" \
|
|
"Search using text query" || true
|
|
|
|
# Text search with parameters
|
|
execute_command \
|
|
"$REDIS_CLI LANCE SEARCH.TEXT products 'laptop computer' K 3 NPROBES 10" \
|
|
"Search products using text with parameters" || true
|
|
|
|
# Search in image dataset
|
|
execute_command \
|
|
"$REDIS_CLI LANCE SEARCH.TEXT images 'sunset landscape' K 5" \
|
|
"Search images using text description" || true
|
|
}
|
|
|
|
# Index Management Operations
|
|
demo_index_management() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " INDEX MANAGEMENT DEMO"
|
|
echo "=========================================="
|
|
|
|
# Create indexes for better search performance
|
|
execute_command \
|
|
"$REDIS_CLI LANCE CREATE.INDEX documents IVF_PQ" \
|
|
"Create default IVF_PQ index for documents"
|
|
|
|
execute_command \
|
|
"$REDIS_CLI LANCE CREATE.INDEX products IVF_PQ PARTITIONS 512 SUBVECTORS 32" \
|
|
"Create IVF_PQ index with custom parameters for products"
|
|
|
|
execute_command \
|
|
"$REDIS_CLI LANCE CREATE.INDEX images IVF_PQ PARTITIONS 256 SUBVECTORS 16" \
|
|
"Create IVF_PQ index for images dataset"
|
|
|
|
log_success "Indexes created successfully"
|
|
}
|
|
|
|
# Advanced Usage Examples
|
|
demo_advanced_usage() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " ADVANCED USAGE EXAMPLES"
|
|
echo "=========================================="
|
|
|
|
# Create a specialized dataset for semantic search
|
|
execute_command \
|
|
"$REDIS_CLI LANCE CREATE semantic_search DIM 1536 SCHEMA title:string content:string url:string timestamp:string source:string" \
|
|
"Create dataset for semantic search with rich metadata"
|
|
|
|
# Demonstrate batch operations concept
|
|
log_info "Batch operations example (would store multiple items):"
|
|
echo " for doc in documents:"
|
|
echo " LANCE STORE semantic_search TEXT \"\$doc_content\" title \"\$title\" url \"\$url\""
|
|
|
|
# Show monitoring commands
|
|
log_info "Monitoring and maintenance commands:"
|
|
execute_command \
|
|
"$REDIS_CLI LANCE LIST" \
|
|
"List all datasets for monitoring"
|
|
|
|
# Show dataset statistics
|
|
for dataset in documents products images semantic_search; do
|
|
execute_command \
|
|
"$REDIS_CLI LANCE INFO $dataset" \
|
|
"Get statistics for $dataset" || true
|
|
done
|
|
}
|
|
|
|
# Cleanup Operations
|
|
demo_cleanup() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " CLEANUP OPERATIONS DEMO"
|
|
echo "=========================================="
|
|
|
|
log_info "Demonstrating cleanup operations..."
|
|
|
|
# Drop individual datasets
|
|
execute_command \
|
|
"$REDIS_CLI LANCE DROP semantic_search" \
|
|
"Drop semantic_search dataset"
|
|
|
|
# List remaining datasets
|
|
execute_command \
|
|
"$REDIS_CLI LANCE LIST" \
|
|
"List remaining datasets"
|
|
|
|
# Ask user if they want to clean up all test data
|
|
echo
|
|
read -p "Do you want to clean up all test datasets? (y/N): " -n 1 -r
|
|
echo
|
|
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
execute_command \
|
|
"$REDIS_CLI LANCE DROP documents" \
|
|
"Drop documents dataset"
|
|
|
|
execute_command \
|
|
"$REDIS_CLI LANCE DROP products" \
|
|
"Drop products dataset"
|
|
|
|
execute_command \
|
|
"$REDIS_CLI LANCE DROP images" \
|
|
"Drop images dataset"
|
|
|
|
execute_command \
|
|
"$REDIS_CLI LANCE LIST" \
|
|
"Verify all datasets are cleaned up"
|
|
|
|
log_success "All test datasets cleaned up"
|
|
else
|
|
log_info "Keeping test datasets for further experimentation"
|
|
fi
|
|
}
|
|
|
|
# Error Handling Demo
|
|
demo_error_handling() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " ERROR HANDLING DEMO"
|
|
echo "=========================================="
|
|
|
|
log_info "Demonstrating various error conditions..."
|
|
|
|
# Try to access non-existent dataset
|
|
execute_command \
|
|
"$REDIS_CLI LANCE INFO nonexistent_dataset" \
|
|
"Try to get info for non-existent dataset" || true
|
|
|
|
# Try to search non-existent dataset
|
|
execute_command \
|
|
"$REDIS_CLI LANCE SEARCH nonexistent_dataset VECTOR '0.1,0.2' K 5" \
|
|
"Try to search non-existent dataset" || true
|
|
|
|
# Try to drop non-existent dataset
|
|
execute_command \
|
|
"$REDIS_CLI LANCE DROP nonexistent_dataset" \
|
|
"Try to drop non-existent dataset" || true
|
|
|
|
# Try invalid vector format
|
|
execute_command \
|
|
"$REDIS_CLI LANCE SEARCH documents VECTOR 'invalid,vector,format' K 5" \
|
|
"Try search with invalid vector format" || true
|
|
|
|
log_info "Error handling demonstration complete"
|
|
}
|
|
|
|
# Performance Testing Demo
|
|
demo_performance_testing() {
|
|
echo
|
|
echo "=========================================="
|
|
echo " PERFORMANCE TESTING DEMO"
|
|
echo "=========================================="
|
|
|
|
log_info "Creating performance test dataset..."
|
|
execute_command \
|
|
"$REDIS_CLI LANCE CREATE perf_test DIM 128 SCHEMA batch_id:string item_id:string" \
|
|
"Create performance test dataset"
|
|
|
|
log_info "Performance testing would involve:"
|
|
echo " 1. Bulk loading thousands of vectors"
|
|
echo " 2. Creating indexes with different parameters"
|
|
echo " 3. Measuring search latency with various K values"
|
|
echo " 4. Testing different NPROBES settings"
|
|
echo " 5. Monitoring memory usage"
|
|
|
|
log_info "Example performance test commands:"
|
|
echo " # Test search speed with different parameters"
|
|
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10"
|
|
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10 NPROBES 50"
|
|
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 100 NPROBES 100"
|
|
|
|
# Clean up performance test dataset
|
|
execute_command \
|
|
"$REDIS_CLI LANCE DROP perf_test" \
|
|
"Clean up performance test dataset"
|
|
}
|
|
|
|
# Main execution
|
|
main() {
|
|
echo "=========================================="
|
|
echo " LANCE VECTOR DATABASE DEMO SCRIPT"
|
|
echo "=========================================="
|
|
echo
|
|
echo "This script demonstrates all Lance vector database operations."
|
|
echo "Note: Some operations will fail without a running embedding service."
|
|
echo "This is expected and demonstrates error handling."
|
|
echo
|
|
|
|
# Check prerequisites
|
|
check_herodb
|
|
|
|
# Setup
|
|
setup_embedding_service
|
|
|
|
# Run demos
|
|
demo_dataset_management
|
|
demo_embedding_operations
|
|
demo_data_storage
|
|
demo_search_operations
|
|
demo_index_management
|
|
demo_advanced_usage
|
|
demo_error_handling
|
|
demo_performance_testing
|
|
|
|
# Cleanup
|
|
demo_cleanup
|
|
|
|
echo
|
|
echo "=========================================="
|
|
echo " DEMO COMPLETE"
|
|
echo "=========================================="
|
|
echo
|
|
log_success "Lance vector database demo completed successfully!"
|
|
echo
|
|
echo "Next steps:"
|
|
echo "1. Set up a real embedding service (OpenAI, Hugging Face, etc.)"
|
|
echo "2. Update the embedding service URL configuration"
|
|
echo "3. Try storing and searching real data"
|
|
echo "4. Experiment with different vector dimensions and index parameters"
|
|
echo "5. Build your AI-powered application!"
|
|
echo
|
|
echo "For more information, see docs/lance_vector_db.md"
|
|
}
|
|
|
|
# Run the demo
|
|
main "$@" |