...
This commit is contained in:
535
Cargo.lock
generated
535
Cargo.lock
generated
@@ -235,40 +235,19 @@ version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3f15b4c6b148206ff3a2b35002e08929c2462467b62b9c02036d9c34f9ef994"
|
||||
dependencies = [
|
||||
"arrow-arith 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-cast 55.2.0",
|
||||
"arrow-csv 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-ipc 55.2.0",
|
||||
"arrow-json 55.2.0",
|
||||
"arrow-ord 55.2.0",
|
||||
"arrow-row 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow-string 55.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd798aea3553913a5986813e9c6ad31a2d2b04e931fe8ea4a37155eb541cebb5"
|
||||
dependencies = [
|
||||
"arrow-arith 56.0.0",
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-cast 56.0.0",
|
||||
"arrow-csv 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-ipc 56.0.0",
|
||||
"arrow-json 56.0.0",
|
||||
"arrow-ord 56.0.0",
|
||||
"arrow-row 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-select 56.0.0",
|
||||
"arrow-string 56.0.0",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-csv",
|
||||
"arrow-data",
|
||||
"arrow-ipc",
|
||||
"arrow-json",
|
||||
"arrow-ord",
|
||||
"arrow-row",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"arrow-string",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -277,24 +256,10 @@ version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"chrono",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-arith"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "508dafb53e5804a238cab7fd97a59ddcbfab20cc4d9814b1ab5465b9fa147f2e"
|
||||
dependencies = [
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"num",
|
||||
]
|
||||
@@ -306,9 +271,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"chrono-tz",
|
||||
"half",
|
||||
@@ -316,22 +281,6 @@ dependencies = [
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-array"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2730bc045d62bb2e53ef8395b7d4242f5c8102f41ceac15e8395b9ac3d08461"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"chrono",
|
||||
"half",
|
||||
"hashbrown 0.15.5",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-buffer"
|
||||
version = "55.2.0"
|
||||
@@ -343,28 +292,17 @@ dependencies = [
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-buffer"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54295b93beb702ee9a6f6fbced08ad7f4d76ec1c297952d4b83cf68755421d1d"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"half",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-cast"
|
||||
version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"atoi",
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
@@ -375,50 +313,15 @@ dependencies = [
|
||||
"ryu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-cast"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67e8bcb7dc971d779a7280593a1bf0c2743533b8028909073e804552e85e75b5"
|
||||
dependencies = [
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-select 56.0.0",
|
||||
"atoi",
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
"half",
|
||||
"lexical-core",
|
||||
"num",
|
||||
"ryu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-csv"
|
||||
version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-cast 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"chrono",
|
||||
"csv",
|
||||
"csv-core",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-csv"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "673fd2b5fb57a1754fdbfac425efd7cf54c947ac9950c1cce86b14e248f1c458"
|
||||
dependencies = [
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-cast 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-array",
|
||||
"arrow-cast",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"csv",
|
||||
"csv-core",
|
||||
@@ -431,20 +334,8 @@ version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2"
|
||||
dependencies = [
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"half",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-data"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97c22fe3da840039c69e9f61f81e78092ea36d57037b4900151f063615a2f6b4"
|
||||
dependencies = [
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
"half",
|
||||
"num",
|
||||
]
|
||||
@@ -455,61 +346,26 @@ version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"flatbuffers",
|
||||
"lz4_flex",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-ipc"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "778de14c5a69aedb27359e3dd06dd5f9c481d5f6ee9fbae912dba332fd64636b"
|
||||
dependencies = [
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"flatbuffers",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-json"
|
||||
version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-cast 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"chrono",
|
||||
"half",
|
||||
"indexmap",
|
||||
"lexical-core",
|
||||
"memchr",
|
||||
"num",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"simdutf8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-json"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3860db334fe7b19fcf81f6b56f8d9d95053f3839ffe443d56b5436f7a29a1794"
|
||||
dependencies = [
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-cast 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"half",
|
||||
"indexmap",
|
||||
@@ -527,24 +383,11 @@ version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-ord"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "425fa0b42a39d3ff55160832e7c25553e7f012c3f187def3d70313e7a29ba5d9"
|
||||
dependencies = [
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-select 56.0.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -553,23 +396,10 @@ version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-row"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df9c9423c9e71abd1b08a7f788fcd203ba2698ac8e72a1f236f1faa1a06a7414"
|
||||
dependencies = [
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"half",
|
||||
]
|
||||
|
||||
@@ -584,12 +414,6 @@ dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-schema"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85fa1babc4a45fdc64a92175ef51ff00eba5ebbc0007962fecf8022ac1c6ce28"
|
||||
|
||||
[[package]]
|
||||
name = "arrow-select"
|
||||
version = "55.2.0"
|
||||
@@ -597,24 +421,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"num",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-select"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8854d15f1cf5005b4b358abeb60adea17091ff5bdd094dca5d3f73787d81170"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"num",
|
||||
]
|
||||
|
||||
@@ -624,28 +434,11 @@ version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"memchr",
|
||||
"num",
|
||||
"regex",
|
||||
"regex-syntax 0.8.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-string"
|
||||
version = "56.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c477e8b89e1213d5927a2a84a72c384a9bf4dd0dbf15f9fd66d821aafd9e95e"
|
||||
dependencies = [
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-select 56.0.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"memchr",
|
||||
"num",
|
||||
"regex",
|
||||
@@ -1863,9 +1656,9 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a11e19a7ccc5bb979c95c1dceef663eab39c9061b3bbf8d1937faf0f03bf41f"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-ipc 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
"arrow-schema",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"chrono",
|
||||
@@ -1911,7 +1704,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94985e67cab97b1099db2a7af11f31a45008b282aba921c1e1d35327c212ec18"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"dashmap 6.1.0",
|
||||
"datafusion-common",
|
||||
@@ -1937,7 +1730,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e002df133bdb7b0b9b429d89a69aa77b35caeadee4498b2ce1c7c23a99516988"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"datafusion-catalog",
|
||||
"datafusion-common",
|
||||
@@ -1961,8 +1754,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e13242fc58fd753787b0a538e5ae77d356cb9d0656fa85a591a33c5f106267f6"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow 55.2.0",
|
||||
"arrow-ipc 55.2.0",
|
||||
"arrow",
|
||||
"arrow-ipc",
|
||||
"base64 0.22.1",
|
||||
"half",
|
||||
"hashbrown 0.14.5",
|
||||
@@ -1993,7 +1786,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2cf792579bc8bf07d1b2f68c2d5382f8a63679cce8fbebfd4ba95742b6e08864"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"chrono",
|
||||
@@ -2021,7 +1814,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cfc114f9a1415174f3e8d2719c371fc72092ef2195a7955404cfe6b2ba29a706"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"datafusion-catalog",
|
||||
@@ -2046,7 +1839,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d88dd5e215c420a52362b9988ecd4cefd71081b730663d4f7d886f706111fc75"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"datafusion-catalog",
|
||||
@@ -2077,7 +1870,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9609d83d52ff8315283c6dad3b97566e877d8f366fab4c3297742f33dcd636c7"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"dashmap 6.1.0",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
@@ -2096,7 +1889,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e75230cd67f650ef0399eb00f54d4a073698f2c0262948298e5299fc7324da63"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"chrono",
|
||||
"datafusion-common",
|
||||
"datafusion-doc",
|
||||
@@ -2116,7 +1909,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70fafb3a045ed6c49cfca0cd090f62cf871ca6326cc3355cb0aaf1260fa760b6"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"indexmap",
|
||||
"itertools 0.14.0",
|
||||
@@ -2129,8 +1922,8 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdf9a9cf655265861a20453b1e58357147eab59bdc90ce7f2f68f1f35104d3bb"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow",
|
||||
"arrow-buffer",
|
||||
"base64 0.22.1",
|
||||
"blake2",
|
||||
"blake3",
|
||||
@@ -2159,7 +1952,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f07e49733d847be0a05235e17b884d326a2fd402c97a89fe8bcf0bfba310005"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"datafusion-doc",
|
||||
"datafusion-execution",
|
||||
@@ -2180,7 +1973,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4512607e10d72b0b0a1dc08f42cb5bd5284cb8348b7fea49dc83409493e32b1b"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"datafusion-expr-common",
|
||||
"datafusion-physical-expr-common",
|
||||
@@ -2192,8 +1985,8 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ab331806e34f5545e5f03396e4d5068077395b1665795d8f88c14ec4f1e0b7a"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-ord 55.2.0",
|
||||
"arrow",
|
||||
"arrow-ord",
|
||||
"datafusion-common",
|
||||
"datafusion-doc",
|
||||
"datafusion-execution",
|
||||
@@ -2213,7 +2006,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4ac2c0be983a06950ef077e34e0174aa0cb9e346f3aeae459823158037ade37"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"datafusion-catalog",
|
||||
"datafusion-common",
|
||||
@@ -2229,7 +2022,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36f3d92731de384c90906941d36dcadf6a86d4128409a9c5cd916662baed5f53"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"datafusion-doc",
|
||||
"datafusion-expr",
|
||||
@@ -2268,7 +2061,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1594c7a97219ede334f25347ad8d57056621e7f4f35a0693c8da876e10dd6a53"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"chrono",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
@@ -2287,7 +2080,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc6da0f2412088d23f6b01929dedd687b5aee63b19b674eb73d00c3eb3c883b7"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-expr-common",
|
||||
@@ -2309,7 +2102,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcb0dbd9213078a593c3fe28783beaa625a4e6c6a6c797856ee2ba234311fb96"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"datafusion-expr-common",
|
||||
"hashbrown 0.14.5",
|
||||
@@ -2322,7 +2115,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d140854b2db3ef8ac611caad12bfb2e1e1de827077429322a6188f18fc0026a"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"datafusion-common",
|
||||
"datafusion-execution",
|
||||
"datafusion-expr",
|
||||
@@ -2341,9 +2134,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b46cbdf21a01206be76d467f325273b22c559c744a012ead5018dfe79597de08"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow 55.2.0",
|
||||
"arrow-ord 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"async-trait",
|
||||
"chrono",
|
||||
"datafusion-common",
|
||||
@@ -2370,7 +2163,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a72733766ddb5b41534910926e8da5836622316f6283307fd9fb7e19811a59c"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"async-trait",
|
||||
"dashmap 6.1.0",
|
||||
"datafusion-common",
|
||||
@@ -2394,7 +2187,7 @@ version = "48.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5162338cdec9cc7ea13a0e6015c361acad5ec1d88d83f7c86301f789473971f"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow",
|
||||
"bigdecimal",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
@@ -2803,7 +2596,7 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "548190a42654ce848835b410ae33f43b4d55cb24548fd0a885a289a1d5a95019"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-array",
|
||||
"rand 0.9.2",
|
||||
]
|
||||
|
||||
@@ -3093,9 +2886,9 @@ version = "0.0.1"
|
||||
dependencies = [
|
||||
"age",
|
||||
"anyhow",
|
||||
"arrow 56.0.0",
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-schema",
|
||||
"base64 0.22.1",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -3759,15 +3552,15 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94bafd9d9a9301c1eac48892ec8016d4d28204d4fc55f2ebebee9a7af465e152"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-arith 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-ipc 55.2.0",
|
||||
"arrow-ord 55.2.0",
|
||||
"arrow-row 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-ipc",
|
||||
"arrow-ord",
|
||||
"arrow-row",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
"async_cell",
|
||||
@@ -3822,12 +3615,12 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b97ebcd8edc2b534e8ded20c97c8928e275160794af91ed803a3d48d8d2a88d8"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-cast 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"bytes",
|
||||
"getrandom 0.2.16",
|
||||
"half",
|
||||
@@ -3841,9 +3634,9 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce5c1849d07985d6a5011aca9de43c7a42ec4c996d66ef3f2d9896c227cc934c"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-schema",
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
@@ -3878,12 +3671,12 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d355c087bc66d85e36cfb428465f585b13971e1e13585dd2b6886a54d8a7d9a4"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-ord 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"async-trait",
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
@@ -3908,10 +3701,10 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "110d4dedfe02e9cff8f11cfb64a261755da7ee9131845197efeec8b659cc5513"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-cast 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-cast",
|
||||
"arrow-schema",
|
||||
"chrono",
|
||||
"futures",
|
||||
"hex",
|
||||
@@ -3927,14 +3720,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "66750006299a2fb003091bc290eb1fe2a5933e35236d921934131f3e4629cd33"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrow 55.2.0",
|
||||
"arrow-arith 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-cast 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
@@ -3967,12 +3760,12 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c639062100610a075e01fd455173348b2fccea10cb0e89f70e38a3183c56022"
|
||||
dependencies = [
|
||||
"arrow-arith 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -4003,11 +3796,11 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ae67a048a51fb525d1bfde86d1b39118462277e7e7a7cd0e7ba866312873532"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-ord 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"async-channel",
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -4058,14 +3851,14 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc86c7307e2d3d895cfefa503f986edcbdd208eb0aa89ba2c75724ba04bce843"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-arith 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-cast 55.2.0",
|
||||
"arrow-data 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-select 55.2.0",
|
||||
"arrow",
|
||||
"arrow-arith",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"async-priority-channel",
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -4100,10 +3893,10 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "769f910b6f2ad5eb4d1b3071c533b619351e61e0dfca74f13c98680a8e6476e9"
|
||||
dependencies = [
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-ord 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-ord",
|
||||
"arrow-schema",
|
||||
"bitvec",
|
||||
"cc",
|
||||
"deepsize",
|
||||
@@ -4125,11 +3918,11 @@ version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffbeafa8a3e97b5b3a06f06d69b0cefe56e65c64a33f674c40c113b797328bd2"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"arrow-array 55.2.0",
|
||||
"arrow-buffer 55.2.0",
|
||||
"arrow-ipc 55.2.0",
|
||||
"arrow-schema 55.2.0",
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-ipc",
|
||||
"arrow-schema",
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
@@ -4923,18 +4716,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet"
|
||||
version = "56.0.0"
|
||||
version = "55.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7288a07ed5d25939a90f9cb1ca5afa6855faa08ec7700613511ae64bdb0620c"
|
||||
checksum = "b17da4150748086bd43352bc77372efa9b6e3dbd06a04831d2a98c041c225cfa"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"arrow-array 56.0.0",
|
||||
"arrow-buffer 56.0.0",
|
||||
"arrow-cast 56.0.0",
|
||||
"arrow-data 56.0.0",
|
||||
"arrow-ipc 56.0.0",
|
||||
"arrow-schema 56.0.0",
|
||||
"arrow-select 56.0.0",
|
||||
"arrow-array",
|
||||
"arrow-buffer",
|
||||
"arrow-cast",
|
||||
"arrow-data",
|
||||
"arrow-ipc",
|
||||
"arrow-schema",
|
||||
"arrow-select",
|
||||
"base64 0.22.1",
|
||||
"brotli",
|
||||
"bytes",
|
||||
|
@@ -28,10 +28,11 @@ base64 = "0.22"
|
||||
lance = "0.33"
|
||||
lance-index = "0.33"
|
||||
lance-linalg = "0.33"
|
||||
arrow = "56"
|
||||
arrow-array = "56"
|
||||
arrow-schema = "56"
|
||||
parquet = "56"
|
||||
# Use Arrow version compatible with Lance 0.33
|
||||
arrow = "55.2"
|
||||
arrow-array = "55.2"
|
||||
arrow-schema = "55.2"
|
||||
parquet = "55.2"
|
||||
uuid = { version = "1.10", features = ["v4"] }
|
||||
reqwest = { version = "0.11", features = ["json"] }
|
||||
image = "0.25"
|
||||
|
454
docs/lance_vector_db.md
Normal file
454
docs/lance_vector_db.md
Normal file
@@ -0,0 +1,454 @@
|
||||
# Lance Vector Database Operations
|
||||
|
||||
HeroDB includes a powerful vector database integration using Lance, enabling high-performance vector storage, search, and multimodal data management. By default, it uses Ollama for local text embeddings, with support for custom external embedding services.
|
||||
|
||||
## Overview
|
||||
|
||||
The Lance vector database integration provides:
|
||||
|
||||
- **High-performance vector storage** using Lance's columnar format
|
||||
- **Local Ollama integration** for text embeddings (default, no external dependencies)
|
||||
- **Custom embedding service support** for advanced use cases
|
||||
- **Text embedding support** (images via custom services)
|
||||
- **Vector similarity search** with configurable parameters
|
||||
- **Scalable indexing** with IVF_PQ (Inverted File with Product Quantization)
|
||||
- **Redis-compatible command interface**
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
||||
│ HeroDB │ │ External │ │ Lance │
|
||||
│ Redis Server │◄──►│ Embedding │ │ Vector Store │
|
||||
│ │ │ Service │ │ │
|
||||
└─────────────────┘ └──────────────────┘ └─────────────────┘
|
||||
│ │ │
|
||||
│ │ │
|
||||
Redis Protocol HTTP API Arrow/Parquet
|
||||
Commands JSON Requests Columnar Storage
|
||||
```
|
||||
|
||||
### Key Components
|
||||
|
||||
1. **Lance Store**: High-performance columnar vector storage
|
||||
2. **Ollama Integration**: Local embedding service (default)
|
||||
3. **Custom Embedding Service**: Optional HTTP API for advanced use cases
|
||||
4. **Redis Command Interface**: Familiar Redis-style commands
|
||||
5. **Arrow Schema**: Flexible schema definition for metadata
|
||||
|
||||
## Configuration
|
||||
|
||||
### Default Setup (Ollama)
|
||||
|
||||
HeroDB uses Ollama by default for text embeddings. No configuration is required if Ollama is running locally:
|
||||
|
||||
```bash
|
||||
# Install Ollama (if not already installed)
|
||||
# Visit: https://ollama.ai
|
||||
|
||||
# Pull the embedding model
|
||||
ollama pull nomic-embed-text
|
||||
|
||||
# Ollama automatically runs on localhost:11434
|
||||
# HeroDB will use this by default
|
||||
```
|
||||
|
||||
**Default Configuration:**
|
||||
- **URL**: `http://localhost:11434`
|
||||
- **Model**: `nomic-embed-text`
|
||||
- **Dimensions**: 768 (for nomic-embed-text)
|
||||
|
||||
### Custom Embedding Service (Optional)
|
||||
|
||||
To use a custom embedding service instead of Ollama:
|
||||
|
||||
```bash
|
||||
# Set custom embedding service URL
|
||||
redis-cli HSET config:core:aiembed url "http://your-embedding-service:8080/embed"
|
||||
|
||||
# Optional: Set authentication if required
|
||||
redis-cli HSET config:core:aiembed token "your-api-token"
|
||||
```
|
||||
|
||||
### Embedding Service API Contracts
|
||||
|
||||
#### Ollama API (Default)
|
||||
HeroDB calls Ollama using this format:
|
||||
|
||||
```bash
|
||||
POST http://localhost:11434/api/embeddings
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"prompt": "Your text to embed"
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"embedding": [0.1, 0.2, 0.3, ...]
|
||||
}
|
||||
```
|
||||
|
||||
#### Custom Service API
|
||||
Your custom embedding service should accept POST requests with this JSON format:
|
||||
|
||||
```json
|
||||
{
|
||||
"texts": ["text1", "text2"], // Optional: array of texts
|
||||
"images": ["base64_image1", "base64_image2"], // Optional: base64 encoded images
|
||||
"model": "your-model-name" // Optional: model specification
|
||||
}
|
||||
```
|
||||
|
||||
And return responses in this format:
|
||||
|
||||
```json
|
||||
{
|
||||
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]], // Array of embedding vectors
|
||||
"model": "model-name", // Model used
|
||||
"usage": { // Optional usage stats
|
||||
"tokens": 100,
|
||||
"requests": 2
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Commands Reference
|
||||
|
||||
### Dataset Management
|
||||
|
||||
#### LANCE CREATE
|
||||
Create a new vector dataset with specified dimensions and optional schema.
|
||||
|
||||
```bash
|
||||
LANCE CREATE <dataset> DIM <dimension> [SCHEMA field:type ...]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Name of the dataset
|
||||
- `dimension`: Vector dimension (e.g., 384, 768, 1536)
|
||||
- `field:type`: Optional metadata fields (string, int, float, bool)
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Create a simple dataset for 384-dimensional vectors
|
||||
LANCE CREATE documents DIM 384
|
||||
|
||||
# Create dataset with metadata schema
|
||||
LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool
|
||||
```
|
||||
|
||||
#### LANCE LIST
|
||||
List all available datasets.
|
||||
|
||||
```bash
|
||||
LANCE LIST
|
||||
```
|
||||
|
||||
**Returns:** Array of dataset names
|
||||
|
||||
#### LANCE INFO
|
||||
Get information about a specific dataset.
|
||||
|
||||
```bash
|
||||
LANCE INFO <dataset>
|
||||
```
|
||||
|
||||
**Returns:** Dataset metadata including name, version, row count, and schema
|
||||
|
||||
#### LANCE DROP
|
||||
Delete a dataset and all its data.
|
||||
|
||||
```bash
|
||||
LANCE DROP <dataset>
|
||||
```
|
||||
|
||||
### Data Operations
|
||||
|
||||
#### LANCE STORE
|
||||
Store multimodal data (text/images) with automatic embedding generation.
|
||||
|
||||
```bash
|
||||
LANCE STORE <dataset> [TEXT <text>] [IMAGE <base64>] [key value ...]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Target dataset name
|
||||
- `TEXT`: Text content to embed
|
||||
- `IMAGE`: Base64-encoded image to embed
|
||||
- `key value`: Metadata key-value pairs
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
# Store text with metadata
|
||||
LANCE STORE documents TEXT "Machine learning is transforming industries" category "AI" author "John Doe"
|
||||
|
||||
# Store image with metadata
|
||||
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgAA..." category "nature" tags "landscape,mountains"
|
||||
|
||||
# Store both text and image
|
||||
LANCE STORE multimodal TEXT "Beautiful sunset" IMAGE "base64data..." location "California"
|
||||
```
|
||||
|
||||
**Returns:** Unique ID of the stored item
|
||||
|
||||
### Search Operations
|
||||
|
||||
#### LANCE SEARCH
|
||||
Search using a raw vector.
|
||||
|
||||
```bash
|
||||
LANCE SEARCH <dataset> VECTOR <vector> K <k> [NPROBES <n>] [REFINE <r>]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Dataset to search
|
||||
- `vector`: Comma-separated vector values (e.g., "0.1,0.2,0.3")
|
||||
- `k`: Number of results to return
|
||||
- `NPROBES`: Number of partitions to search (optional)
|
||||
- `REFINE`: Refine factor for better accuracy (optional)
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
LANCE SEARCH documents VECTOR "0.1,0.2,0.3,0.4" K 5 NPROBES 10
|
||||
```
|
||||
|
||||
#### LANCE SEARCH.TEXT
|
||||
Search using text query (automatically embedded).
|
||||
|
||||
```bash
|
||||
LANCE SEARCH.TEXT <dataset> <query_text> K <k> [NPROBES <n>] [REFINE <r>]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Dataset to search
|
||||
- `query_text`: Text query to search for
|
||||
- `k`: Number of results to return
|
||||
- `NPROBES`: Number of partitions to search (optional)
|
||||
- `REFINE`: Refine factor for better accuracy (optional)
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
LANCE SEARCH.TEXT documents "artificial intelligence applications" K 10 NPROBES 20
|
||||
```
|
||||
|
||||
**Returns:** Array of results with distance scores and metadata
|
||||
|
||||
### Embedding Operations
|
||||
|
||||
#### LANCE EMBED.TEXT
|
||||
Generate embeddings for text without storing.
|
||||
|
||||
```bash
|
||||
LANCE EMBED.TEXT <text1> [text2] [text3] ...
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
LANCE EMBED.TEXT "Hello world" "Machine learning" "Vector database"
|
||||
```
|
||||
|
||||
**Returns:** Array of embedding vectors
|
||||
|
||||
### Index Management
|
||||
|
||||
#### LANCE CREATE.INDEX
|
||||
Create a vector index for faster search performance.
|
||||
|
||||
```bash
|
||||
LANCE CREATE.INDEX <dataset> <index_type> [PARTITIONS <n>] [SUBVECTORS <n>]
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `dataset`: Dataset to index
|
||||
- `index_type`: Index type (currently supports "IVF_PQ")
|
||||
- `PARTITIONS`: Number of partitions (default: 256)
|
||||
- `SUBVECTORS`: Number of sub-vectors for PQ (default: 16)
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 512 SUBVECTORS 32
|
||||
```
|
||||
|
||||
## Usage Patterns
|
||||
|
||||
### 1. Document Search System
|
||||
|
||||
```bash
|
||||
# Setup
|
||||
LANCE CREATE documents DIM 384 SCHEMA title:string content:string category:string
|
||||
|
||||
# Store documents
|
||||
LANCE STORE documents TEXT "Introduction to machine learning algorithms" title "ML Basics" category "education"
|
||||
LANCE STORE documents TEXT "Deep learning neural networks explained" title "Deep Learning" category "education"
|
||||
LANCE STORE documents TEXT "Building scalable web applications" title "Web Dev" category "programming"
|
||||
|
||||
# Create index for better performance
|
||||
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 256
|
||||
|
||||
# Search
|
||||
LANCE SEARCH.TEXT documents "neural networks" K 5
|
||||
```
|
||||
|
||||
### 2. Image Similarity Search
|
||||
|
||||
```bash
|
||||
# Setup
|
||||
LANCE CREATE images DIM 512 SCHEMA filename:string tags:string
|
||||
|
||||
# Store images (base64 encoded)
|
||||
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgAA..." filename "sunset.jpg" tags "nature,landscape"
|
||||
LANCE STORE images IMAGE "iVBORw0KGgoAAAANSUhEUgBB..." filename "city.jpg" tags "urban,architecture"
|
||||
|
||||
# Search by image
|
||||
LANCE STORE temp_search IMAGE "query_image_base64..."
|
||||
# Then use the returned ID to get embedding and search
|
||||
```
|
||||
|
||||
### 3. Multimodal Content Management
|
||||
|
||||
```bash
|
||||
# Setup
|
||||
LANCE CREATE content DIM 768 SCHEMA type:string source:string
|
||||
|
||||
# Store mixed content
|
||||
LANCE STORE content TEXT "Product description for smartphone" type "product" source "catalog"
|
||||
LANCE STORE content IMAGE "product_image_base64..." type "product_image" source "catalog"
|
||||
|
||||
# Search across all content types
|
||||
LANCE SEARCH.TEXT content "smartphone features" K 10
|
||||
```
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Vector Dimensions
|
||||
- **384**: Good for general text (e.g., sentence-transformers)
|
||||
- **768**: Standard for BERT-like models
|
||||
- **1536**: OpenAI text-embedding-ada-002
|
||||
- **Higher dimensions**: Better accuracy but slower search
|
||||
|
||||
### Index Configuration
|
||||
- **More partitions**: Better for larger datasets (>100K vectors)
|
||||
- **More sub-vectors**: Better compression but slower search
|
||||
- **NPROBES**: Higher values = better accuracy, slower search
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Create indexes** for datasets with >1000 vectors
|
||||
2. **Use appropriate dimensions** based on your embedding model
|
||||
3. **Configure NPROBES** based on accuracy vs speed requirements
|
||||
4. **Batch operations** when possible for better performance
|
||||
5. **Monitor embedding service** response times and rate limits
|
||||
|
||||
## Error Handling
|
||||
|
||||
Common error scenarios and solutions:
|
||||
|
||||
### Embedding Service Errors
|
||||
```bash
|
||||
# Error: Embedding service not configured
|
||||
ERR Embedding service URL not configured. Set it with: HSET config:core:aiembed url <YOUR_EMBEDDING_SERVICE_URL>
|
||||
|
||||
# Error: Service unavailable
|
||||
ERR Embedding service returned error 404 Not Found
|
||||
```
|
||||
|
||||
**Solution:** Ensure embedding service is running and URL is correct.
|
||||
|
||||
### Dataset Errors
|
||||
```bash
|
||||
# Error: Dataset doesn't exist
|
||||
ERR Dataset 'mydata' does not exist
|
||||
|
||||
# Error: Dimension mismatch
|
||||
ERR Vector dimension mismatch: expected 384, got 768
|
||||
```
|
||||
|
||||
**Solution:** Create dataset first or check vector dimensions.
|
||||
|
||||
### Search Errors
|
||||
```bash
|
||||
# Error: Invalid vector format
|
||||
ERR Invalid vector format
|
||||
|
||||
# Error: No index available
|
||||
ERR No index available for fast search
|
||||
```
|
||||
|
||||
**Solution:** Check vector format or create an index.
|
||||
|
||||
## Integration Examples
|
||||
|
||||
### With Python
|
||||
```python
|
||||
import redis
|
||||
import json
|
||||
|
||||
r = redis.Redis(host='localhost', port=6379)
|
||||
|
||||
# Create dataset
|
||||
r.execute_command('LANCE', 'CREATE', 'docs', 'DIM', '384')
|
||||
|
||||
# Store document
|
||||
result = r.execute_command('LANCE', 'STORE', 'docs',
|
||||
'TEXT', 'Machine learning tutorial',
|
||||
'category', 'education')
|
||||
print(f"Stored with ID: {result}")
|
||||
|
||||
# Search
|
||||
results = r.execute_command('LANCE', 'SEARCH.TEXT', 'docs',
|
||||
'machine learning', 'K', '5')
|
||||
print(f"Search results: {results}")
|
||||
```
|
||||
|
||||
### With Node.js
|
||||
```javascript
|
||||
const redis = require('redis');
|
||||
const client = redis.createClient();
|
||||
|
||||
// Create dataset
|
||||
await client.sendCommand(['LANCE', 'CREATE', 'docs', 'DIM', '384']);
|
||||
|
||||
// Store document
|
||||
const id = await client.sendCommand(['LANCE', 'STORE', 'docs',
|
||||
'TEXT', 'Deep learning guide',
|
||||
'category', 'AI']);
|
||||
|
||||
// Search
|
||||
const results = await client.sendCommand(['LANCE', 'SEARCH.TEXT', 'docs',
|
||||
'deep learning', 'K', '10']);
|
||||
```
|
||||
|
||||
## Monitoring and Maintenance
|
||||
|
||||
### Health Checks
|
||||
```bash
|
||||
# Check if Lance store is available
|
||||
LANCE LIST
|
||||
|
||||
# Check dataset health
|
||||
LANCE INFO mydataset
|
||||
|
||||
# Test embedding service
|
||||
LANCE EMBED.TEXT "test"
|
||||
```
|
||||
|
||||
### Maintenance Operations
|
||||
```bash
|
||||
# Backup: Use standard Redis backup procedures
|
||||
# The Lance data is stored separately in the data directory
|
||||
|
||||
# Cleanup: Remove unused datasets
|
||||
LANCE DROP old_dataset
|
||||
|
||||
# Reindex: Drop and recreate indexes if needed
|
||||
LANCE DROP dataset_name
|
||||
LANCE CREATE dataset_name DIM 384
|
||||
# Re-import data
|
||||
LANCE CREATE.INDEX dataset_name IVF_PQ
|
||||
```
|
||||
|
||||
This integration provides a powerful foundation for building AI-powered applications with vector search capabilities while maintaining the familiar Redis interface.
|
@@ -1,6 +1,191 @@
|
||||
# HeroDB Tantivy Search Examples
|
||||
# HeroDB Examples
|
||||
|
||||
This directory contains examples demonstrating HeroDB's full-text search capabilities powered by Tantivy.
|
||||
This directory contains examples demonstrating HeroDB's capabilities including full-text search powered by Tantivy and vector database operations using Lance.
|
||||
|
||||
## Available Examples
|
||||
|
||||
1. **[Tantivy Search Demo](#tantivy-search-demo-bash-script)** - Full-text search capabilities
|
||||
2. **[Lance Vector Database Demo](#lance-vector-database-demo-bash-script)** - Vector database and AI operations
|
||||
3. **[AGE Encryption Demo](age_bash_demo.sh)** - Cryptographic operations
|
||||
4. **[Simple Demo](simple_demo.sh)** - Basic Redis operations
|
||||
|
||||
---
|
||||
|
||||
## Lance Vector Database Demo (Bash Script)
|
||||
|
||||
### Overview
|
||||
The `lance_vector_demo.sh` script provides a comprehensive demonstration of HeroDB's vector database capabilities using Lance. It showcases vector storage, similarity search, multimodal data handling, and AI-powered operations with external embedding services.
|
||||
|
||||
### Prerequisites
|
||||
1. **HeroDB Server**: The server must be running (default port 6379)
|
||||
2. **Redis CLI**: The `redis-cli` tool must be installed and available in your PATH
|
||||
3. **Embedding Service** (optional): For full functionality, set up an external embedding service
|
||||
|
||||
### Running the Demo
|
||||
|
||||
#### Step 1: Start HeroDB Server
|
||||
```bash
|
||||
# From the project root directory
|
||||
cargo run -- --dir ./test_data --port 6379
|
||||
```
|
||||
|
||||
#### Step 2: Run the Demo (in a new terminal)
|
||||
```bash
|
||||
# From the project root directory
|
||||
./examples/lance_vector_demo.sh
|
||||
```
|
||||
|
||||
### What the Demo Covers
|
||||
|
||||
The script demonstrates comprehensive vector database operations:
|
||||
|
||||
1. **Dataset Management**
|
||||
- Creating vector datasets with custom dimensions
|
||||
- Defining schemas with metadata fields
|
||||
- Listing and inspecting datasets
|
||||
- Dataset information and statistics
|
||||
|
||||
2. **Embedding Operations**
|
||||
- Text embedding generation via external services
|
||||
- Multimodal embedding support (text + images)
|
||||
- Batch embedding operations
|
||||
|
||||
3. **Data Storage**
|
||||
- Storing text documents with automatic embedding
|
||||
- Storing images with metadata
|
||||
- Multimodal content storage
|
||||
- Rich metadata support
|
||||
|
||||
4. **Vector Search**
|
||||
- Similarity search with raw vectors
|
||||
- Text-based semantic search
|
||||
- Configurable search parameters (K, NPROBES, REFINE)
|
||||
- Cross-modal search capabilities
|
||||
|
||||
5. **Index Management**
|
||||
- Creating IVF_PQ indexes for performance
|
||||
- Custom index parameters
|
||||
- Performance optimization
|
||||
|
||||
6. **Advanced Features**
|
||||
- Error handling and recovery
|
||||
- Performance testing concepts
|
||||
- Monitoring and maintenance
|
||||
- Cleanup operations
|
||||
|
||||
### Key Lance Commands Demonstrated
|
||||
|
||||
#### Dataset Management
|
||||
```bash
|
||||
# Create vector dataset
|
||||
LANCE CREATE documents DIM 384
|
||||
|
||||
# Create dataset with schema
|
||||
LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool
|
||||
|
||||
# List datasets
|
||||
LANCE LIST
|
||||
|
||||
# Get dataset information
|
||||
LANCE INFO documents
|
||||
```
|
||||
|
||||
#### Data Operations
|
||||
```bash
|
||||
# Store text with metadata
|
||||
LANCE STORE documents TEXT "Machine learning tutorial" category "education" author "John Doe"
|
||||
|
||||
# Store image with metadata
|
||||
LANCE STORE images IMAGE "base64_encoded_image..." filename "photo.jpg" tags "nature,landscape"
|
||||
|
||||
# Store multimodal content
|
||||
LANCE STORE content TEXT "Product description" IMAGE "base64_image..." type "product"
|
||||
```
|
||||
|
||||
#### Search Operations
|
||||
```bash
|
||||
# Search with raw vector
|
||||
LANCE SEARCH documents VECTOR "0.1,0.2,0.3,0.4" K 5
|
||||
|
||||
# Semantic text search
|
||||
LANCE SEARCH.TEXT documents "artificial intelligence" K 10 NPROBES 20
|
||||
|
||||
# Generate embeddings
|
||||
LANCE EMBED.TEXT "Hello world" "Machine learning"
|
||||
```
|
||||
|
||||
#### Index Management
|
||||
```bash
|
||||
# Create performance index
|
||||
LANCE CREATE.INDEX documents IVF_PQ PARTITIONS 256 SUBVECTORS 16
|
||||
|
||||
# Drop dataset
|
||||
LANCE DROP old_dataset
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
#### Setting Up Embedding Service
|
||||
```bash
|
||||
# Configure embedding service URL
|
||||
redis-cli HSET config:core:aiembed url "http://your-embedding-service:8080/embed"
|
||||
|
||||
# Optional: Set authentication token
|
||||
redis-cli HSET config:core:aiembed token "your-api-token"
|
||||
```
|
||||
|
||||
#### Embedding Service API
|
||||
Your embedding service should accept POST requests:
|
||||
```json
|
||||
{
|
||||
"texts": ["text1", "text2"],
|
||||
"images": ["base64_image1", "base64_image2"],
|
||||
"model": "your-model-name"
|
||||
}
|
||||
```
|
||||
|
||||
And return responses:
|
||||
```json
|
||||
{
|
||||
"embeddings": [[0.1, 0.2, ...], [0.3, 0.4, ...]],
|
||||
"model": "model-name",
|
||||
"usage": {"tokens": 100, "requests": 2}
|
||||
}
|
||||
```
|
||||
|
||||
### Interactive Features
|
||||
|
||||
The demo script includes:
|
||||
- **Colored output** for better readability
|
||||
- **Step-by-step execution** with explanations
|
||||
- **Error handling** demonstrations
|
||||
- **Automatic cleanup** options
|
||||
- **Performance testing** concepts
|
||||
- **Real-world usage** examples
|
||||
|
||||
### Use Cases Demonstrated
|
||||
|
||||
1. **Document Search System**
|
||||
- Semantic document retrieval
|
||||
- Metadata filtering
|
||||
- Relevance ranking
|
||||
|
||||
2. **Image Similarity Search**
|
||||
- Visual content matching
|
||||
- Tag-based filtering
|
||||
- Multimodal queries
|
||||
|
||||
3. **Product Recommendations**
|
||||
- Feature-based similarity
|
||||
- Category filtering
|
||||
- Price range queries
|
||||
|
||||
4. **Content Management**
|
||||
- Mixed media storage
|
||||
- Cross-modal search
|
||||
- Rich metadata support
|
||||
|
||||
---
|
||||
|
||||
## Tantivy Search Demo (Bash Script)
|
||||
|
||||
|
426
examples/lance_vector_demo.sh
Executable file
426
examples/lance_vector_demo.sh
Executable file
@@ -0,0 +1,426 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Lance Vector Database Demo Script
|
||||
# This script demonstrates all Lance vector database operations in HeroDB
|
||||
|
||||
set -e # Exit on any error
|
||||
|
||||
# Configuration
|
||||
REDIS_HOST="localhost"
|
||||
REDIS_PORT="6379"
|
||||
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Helper functions
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
log_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
execute_command() {
|
||||
local cmd="$1"
|
||||
local description="$2"
|
||||
|
||||
echo
|
||||
log_info "Executing: $description"
|
||||
echo "Command: $cmd"
|
||||
|
||||
if result=$($cmd 2>&1); then
|
||||
log_success "Result: $result"
|
||||
else
|
||||
log_error "Failed: $result"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Check if HeroDB is running
|
||||
check_herodb() {
|
||||
log_info "Checking if HeroDB is running..."
|
||||
if ! $REDIS_CLI ping > /dev/null 2>&1; then
|
||||
log_error "HeroDB is not running. Please start it first:"
|
||||
echo " cargo run -- --dir ./test_data --port $REDIS_PORT"
|
||||
exit 1
|
||||
fi
|
||||
log_success "HeroDB is running"
|
||||
}
|
||||
|
||||
# Setup embedding service configuration
|
||||
setup_embedding_service() {
|
||||
log_info "Setting up embedding service configuration..."
|
||||
|
||||
# Note: This is a mock URL for demonstration
|
||||
# In production, replace with your actual embedding service
|
||||
execute_command \
|
||||
"$REDIS_CLI HSET config:core:aiembed url 'http://localhost:8080/embed'" \
|
||||
"Configure embedding service URL"
|
||||
|
||||
# Optional: Set authentication token
|
||||
# execute_command \
|
||||
# "$REDIS_CLI HSET config:core:aiembed token 'your-api-token'" \
|
||||
# "Configure embedding service token"
|
||||
|
||||
log_warning "Note: Embedding service at http://localhost:8080/embed is not running."
|
||||
log_warning "Some operations will fail, but this demonstrates the command structure."
|
||||
}
|
||||
|
||||
# Dataset Management Operations
|
||||
demo_dataset_management() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " DATASET MANAGEMENT DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
# List datasets (should be empty initially)
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"List all datasets (initially empty)"
|
||||
|
||||
# Create a simple dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE documents DIM 384" \
|
||||
"Create a simple document dataset with 384 dimensions"
|
||||
|
||||
# Create a dataset with schema
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE products DIM 768 SCHEMA category:string price:float available:bool description:string" \
|
||||
"Create products dataset with custom schema"
|
||||
|
||||
# Create an image dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE images DIM 512 SCHEMA filename:string tags:string width:int height:int" \
|
||||
"Create images dataset for multimodal content"
|
||||
|
||||
# List datasets again
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"List all datasets (should show 3 datasets)"
|
||||
|
||||
# Get info about datasets
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE INFO documents" \
|
||||
"Get information about documents dataset"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE INFO products" \
|
||||
"Get information about products dataset"
|
||||
}
|
||||
|
||||
# Embedding Operations
|
||||
demo_embedding_operations() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " EMBEDDING OPERATIONS DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_warning "The following operations will fail because no embedding service is running."
|
||||
log_warning "This demonstrates the command structure and error handling."
|
||||
|
||||
# Try to embed text (will fail without embedding service)
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE EMBED.TEXT 'Hello world'" \
|
||||
"Generate embedding for single text" || true
|
||||
|
||||
# Try to embed multiple texts
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE EMBED.TEXT 'Machine learning' 'Artificial intelligence' 'Deep learning'" \
|
||||
"Generate embeddings for multiple texts" || true
|
||||
}
|
||||
|
||||
# Data Storage Operations
|
||||
demo_data_storage() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " DATA STORAGE DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_warning "Storage operations will fail without embedding service, but show command structure."
|
||||
|
||||
# Store text documents
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE documents TEXT 'Introduction to machine learning algorithms and their applications in modern AI systems' category 'education' author 'John Doe' difficulty 'beginner'" \
|
||||
"Store a document with text and metadata" || true
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE documents TEXT 'Deep learning neural networks for computer vision tasks' category 'research' author 'Jane Smith' difficulty 'advanced'" \
|
||||
"Store another document" || true
|
||||
|
||||
# Store product information
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE products TEXT 'High-performance laptop with 16GB RAM and SSD storage' category 'electronics' price '1299.99' available 'true'" \
|
||||
"Store product with text description" || true
|
||||
|
||||
# Store image with metadata (using placeholder base64)
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE images IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sample.png' tags 'test,demo' width '1' height '1'" \
|
||||
"Store image with metadata (1x1 pixel PNG)" || true
|
||||
|
||||
# Store multimodal content
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE STORE images TEXT 'Beautiful sunset over mountains' IMAGE 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==' filename 'sunset.png' tags 'nature,landscape' location 'California'" \
|
||||
"Store multimodal content (text + image)" || true
|
||||
}
|
||||
|
||||
# Search Operations
|
||||
demo_search_operations() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " SEARCH OPERATIONS DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_warning "Search operations will fail without data, but show command structure."
|
||||
|
||||
# Search with raw vector
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 5" \
|
||||
"Search with raw vector (5 results)" || true
|
||||
|
||||
# Search with vector and parameters
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH documents VECTOR '0.1,0.2,0.3,0.4,0.5' K 10 NPROBES 20 REFINE 2" \
|
||||
"Search with vector and advanced parameters" || true
|
||||
|
||||
# Text-based search
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH.TEXT documents 'machine learning algorithms' K 5" \
|
||||
"Search using text query" || true
|
||||
|
||||
# Text search with parameters
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH.TEXT products 'laptop computer' K 3 NPROBES 10" \
|
||||
"Search products using text with parameters" || true
|
||||
|
||||
# Search in image dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH.TEXT images 'sunset landscape' K 5" \
|
||||
"Search images using text description" || true
|
||||
}
|
||||
|
||||
# Index Management Operations
|
||||
demo_index_management() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " INDEX MANAGEMENT DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
# Create indexes for better search performance
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE.INDEX documents IVF_PQ" \
|
||||
"Create default IVF_PQ index for documents"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE.INDEX products IVF_PQ PARTITIONS 512 SUBVECTORS 32" \
|
||||
"Create IVF_PQ index with custom parameters for products"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE.INDEX images IVF_PQ PARTITIONS 256 SUBVECTORS 16" \
|
||||
"Create IVF_PQ index for images dataset"
|
||||
|
||||
log_success "Indexes created successfully"
|
||||
}
|
||||
|
||||
# Advanced Usage Examples
|
||||
demo_advanced_usage() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " ADVANCED USAGE EXAMPLES"
|
||||
echo "=========================================="
|
||||
|
||||
# Create a specialized dataset for semantic search
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE semantic_search DIM 1536 SCHEMA title:string content:string url:string timestamp:string source:string" \
|
||||
"Create dataset for semantic search with rich metadata"
|
||||
|
||||
# Demonstrate batch operations concept
|
||||
log_info "Batch operations example (would store multiple items):"
|
||||
echo " for doc in documents:"
|
||||
echo " LANCE STORE semantic_search TEXT \"\$doc_content\" title \"\$title\" url \"\$url\""
|
||||
|
||||
# Show monitoring commands
|
||||
log_info "Monitoring and maintenance commands:"
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"List all datasets for monitoring"
|
||||
|
||||
# Show dataset statistics
|
||||
for dataset in documents products images semantic_search; do
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE INFO $dataset" \
|
||||
"Get statistics for $dataset" || true
|
||||
done
|
||||
}
|
||||
|
||||
# Cleanup Operations
|
||||
demo_cleanup() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " CLEANUP OPERATIONS DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_info "Demonstrating cleanup operations..."
|
||||
|
||||
# Drop individual datasets
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP semantic_search" \
|
||||
"Drop semantic_search dataset"
|
||||
|
||||
# List remaining datasets
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"List remaining datasets"
|
||||
|
||||
# Ask user if they want to clean up all test data
|
||||
echo
|
||||
read -p "Do you want to clean up all test datasets? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP documents" \
|
||||
"Drop documents dataset"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP products" \
|
||||
"Drop products dataset"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP images" \
|
||||
"Drop images dataset"
|
||||
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE LIST" \
|
||||
"Verify all datasets are cleaned up"
|
||||
|
||||
log_success "All test datasets cleaned up"
|
||||
else
|
||||
log_info "Keeping test datasets for further experimentation"
|
||||
fi
|
||||
}
|
||||
|
||||
# Error Handling Demo
|
||||
demo_error_handling() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " ERROR HANDLING DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_info "Demonstrating various error conditions..."
|
||||
|
||||
# Try to access non-existent dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE INFO nonexistent_dataset" \
|
||||
"Try to get info for non-existent dataset" || true
|
||||
|
||||
# Try to search non-existent dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH nonexistent_dataset VECTOR '0.1,0.2' K 5" \
|
||||
"Try to search non-existent dataset" || true
|
||||
|
||||
# Try to drop non-existent dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP nonexistent_dataset" \
|
||||
"Try to drop non-existent dataset" || true
|
||||
|
||||
# Try invalid vector format
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE SEARCH documents VECTOR 'invalid,vector,format' K 5" \
|
||||
"Try search with invalid vector format" || true
|
||||
|
||||
log_info "Error handling demonstration complete"
|
||||
}
|
||||
|
||||
# Performance Testing Demo
|
||||
demo_performance_testing() {
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " PERFORMANCE TESTING DEMO"
|
||||
echo "=========================================="
|
||||
|
||||
log_info "Creating performance test dataset..."
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE CREATE perf_test DIM 128 SCHEMA batch_id:string item_id:string" \
|
||||
"Create performance test dataset"
|
||||
|
||||
log_info "Performance testing would involve:"
|
||||
echo " 1. Bulk loading thousands of vectors"
|
||||
echo " 2. Creating indexes with different parameters"
|
||||
echo " 3. Measuring search latency with various K values"
|
||||
echo " 4. Testing different NPROBES settings"
|
||||
echo " 5. Monitoring memory usage"
|
||||
|
||||
log_info "Example performance test commands:"
|
||||
echo " # Test search speed with different parameters"
|
||||
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10"
|
||||
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 10 NPROBES 50"
|
||||
echo " time redis-cli LANCE SEARCH.TEXT perf_test 'query' K 100 NPROBES 100"
|
||||
|
||||
# Clean up performance test dataset
|
||||
execute_command \
|
||||
"$REDIS_CLI LANCE DROP perf_test" \
|
||||
"Clean up performance test dataset"
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
echo "=========================================="
|
||||
echo " LANCE VECTOR DATABASE DEMO SCRIPT"
|
||||
echo "=========================================="
|
||||
echo
|
||||
echo "This script demonstrates all Lance vector database operations."
|
||||
echo "Note: Some operations will fail without a running embedding service."
|
||||
echo "This is expected and demonstrates error handling."
|
||||
echo
|
||||
|
||||
# Check prerequisites
|
||||
check_herodb
|
||||
|
||||
# Setup
|
||||
setup_embedding_service
|
||||
|
||||
# Run demos
|
||||
demo_dataset_management
|
||||
demo_embedding_operations
|
||||
demo_data_storage
|
||||
demo_search_operations
|
||||
demo_index_management
|
||||
demo_advanced_usage
|
||||
demo_error_handling
|
||||
demo_performance_testing
|
||||
|
||||
# Cleanup
|
||||
demo_cleanup
|
||||
|
||||
echo
|
||||
echo "=========================================="
|
||||
echo " DEMO COMPLETE"
|
||||
echo "=========================================="
|
||||
echo
|
||||
log_success "Lance vector database demo completed successfully!"
|
||||
echo
|
||||
echo "Next steps:"
|
||||
echo "1. Set up a real embedding service (OpenAI, Hugging Face, etc.)"
|
||||
echo "2. Update the embedding service URL configuration"
|
||||
echo "3. Try storing and searching real data"
|
||||
echo "4. Experiment with different vector dimensions and index parameters"
|
||||
echo "5. Build your AI-powered application!"
|
||||
echo
|
||||
echo "For more information, see docs/lance_vector_db.md"
|
||||
}
|
||||
|
||||
# Run the demo
|
||||
main "$@"
|
165
src/cmd.rs
165
src/cmd.rs
@@ -1,6 +1,8 @@
|
||||
use crate::{error::DBError, protocol::Protocol, server::Server};
|
||||
use tokio::time::{timeout, Duration};
|
||||
use futures::future::select_all;
|
||||
use std::sync::Arc;
|
||||
use base64::Engine;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Cmd {
|
||||
@@ -1006,11 +1008,11 @@ impl Cmd {
|
||||
Cmd::AgeList => Ok(crate::age::cmd_age_list(server).await),
|
||||
|
||||
// Lance vector database commands
|
||||
Cmd::LanceCreate { dataset, dim, schema } => lance_create_cmd(server, &dataset, *dim, &schema).await,
|
||||
Cmd::LanceStore { dataset, text, image_base64, metadata } => lance_store_cmd(server, &dataset, text.as_deref(), image_base64.as_deref(), metadata).await,
|
||||
Cmd::LanceSearch { dataset, vector, k, nprobes, refine_factor } => lance_search_cmd(server, &dataset, vector, *k, nprobes, refine_factor).await,
|
||||
Cmd::LanceSearchText { dataset, query_text, k, nprobes, refine_factor } => lance_search_text_cmd(server, &dataset, &query_text, *k, nprobes, refine_factor).await,
|
||||
Cmd::LanceEmbedText { texts } => lance_embed_text_cmd(server, texts).await,
|
||||
Cmd::LanceCreate { dataset, dim, schema } => lance_create_cmd(server, &dataset, dim, &schema).await,
|
||||
Cmd::LanceStore { dataset, text, image_base64, metadata } => lance_store_cmd(server, &dataset, text.as_deref(), image_base64.as_deref(), &metadata).await,
|
||||
Cmd::LanceSearch { dataset, vector, k, nprobes, refine_factor } => lance_search_cmd(server, &dataset, &vector, k, nprobes, refine_factor).await,
|
||||
Cmd::LanceSearchText { dataset, query_text, k, nprobes, refine_factor } => lance_search_text_cmd(server, &dataset, &query_text, k, nprobes, refine_factor).await,
|
||||
Cmd::LanceEmbedText { texts } => lance_embed_text_cmd(server, &texts).await,
|
||||
Cmd::LanceCreateIndex { dataset, index_type, num_partitions, num_sub_vectors } => lance_create_index_cmd(server, &dataset, &index_type, num_partitions, num_sub_vectors).await,
|
||||
Cmd::LanceList => lance_list_cmd(server).await,
|
||||
Cmd::LanceDrop { dataset } => lance_drop_cmd(server, &dataset).await,
|
||||
@@ -1800,6 +1802,36 @@ fn command_cmd(args: &[String]) -> Result<Protocol, DBError> {
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to create Arrow schema from field specifications
|
||||
fn create_schema_from_fields(dim: usize, fields: &[(String, String)]) -> arrow::datatypes::Schema {
|
||||
let mut schema_fields = Vec::new();
|
||||
|
||||
// Always add the vector field first
|
||||
let vector_field = arrow::datatypes::Field::new(
|
||||
"vector",
|
||||
arrow::datatypes::DataType::FixedSizeList(
|
||||
Arc::new(arrow::datatypes::Field::new("item", arrow::datatypes::DataType::Float32, true)),
|
||||
dim as i32
|
||||
),
|
||||
false
|
||||
);
|
||||
schema_fields.push(vector_field);
|
||||
|
||||
// Add custom fields
|
||||
for (name, field_type) in fields {
|
||||
let data_type = match field_type.to_lowercase().as_str() {
|
||||
"string" | "text" => arrow::datatypes::DataType::Utf8,
|
||||
"int" | "integer" => arrow::datatypes::DataType::Int64,
|
||||
"float" => arrow::datatypes::DataType::Float64,
|
||||
"bool" | "boolean" => arrow::datatypes::DataType::Boolean,
|
||||
_ => arrow::datatypes::DataType::Utf8, // Default to string
|
||||
};
|
||||
schema_fields.push(arrow::datatypes::Field::new(name, data_type, true));
|
||||
}
|
||||
|
||||
arrow::datatypes::Schema::new(schema_fields)
|
||||
}
|
||||
|
||||
// Lance vector database command implementations
|
||||
async fn lance_create_cmd(
|
||||
server: &Server,
|
||||
@@ -1809,12 +1841,12 @@ async fn lance_create_cmd(
|
||||
) -> Result<Protocol, DBError> {
|
||||
match server.lance_store() {
|
||||
Ok(lance_store) => {
|
||||
match lance_store.create_dataset(dataset, dim, schema.to_vec()).await {
|
||||
match lance_store.create_dataset(dataset, create_schema_from_fields(dim, schema)).await {
|
||||
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1827,12 +1859,14 @@ async fn lance_store_cmd(
|
||||
) -> Result<Protocol, DBError> {
|
||||
match server.lance_store() {
|
||||
Ok(lance_store) => {
|
||||
match lance_store.store_data(dataset, text, image_base64, metadata.clone()).await {
|
||||
match lance_store.store_multimodal(server, dataset, text.map(|s| s.to_string()),
|
||||
image_base64.and_then(|s| base64::engine::general_purpose::STANDARD.decode(s).ok()),
|
||||
metadata.clone()).await {
|
||||
Ok(id) => Ok(Protocol::BulkString(id)),
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1846,24 +1880,14 @@ async fn lance_search_cmd(
|
||||
) -> Result<Protocol, DBError> {
|
||||
match server.lance_store() {
|
||||
Ok(lance_store) => {
|
||||
match lance_store.search_vector(dataset, vector, k, nprobes, refine_factor).await {
|
||||
match lance_store.search_vectors(dataset, vector.to_vec(), k, nprobes, refine_factor).await {
|
||||
Ok(results) => {
|
||||
let mut response = Vec::new();
|
||||
for result in results {
|
||||
for (distance, metadata) in results {
|
||||
let mut item = Vec::new();
|
||||
item.push(Protocol::BulkString("id".to_string()));
|
||||
item.push(Protocol::BulkString(result.id));
|
||||
item.push(Protocol::BulkString("score".to_string()));
|
||||
item.push(Protocol::BulkString(result.score.to_string()));
|
||||
if let Some(text) = result.text {
|
||||
item.push(Protocol::BulkString("text".to_string()));
|
||||
item.push(Protocol::BulkString(text));
|
||||
}
|
||||
if let Some(image) = result.image_base64 {
|
||||
item.push(Protocol::BulkString("image".to_string()));
|
||||
item.push(Protocol::BulkString(image));
|
||||
}
|
||||
for (key, value) in result.metadata {
|
||||
item.push(Protocol::BulkString("distance".to_string()));
|
||||
item.push(Protocol::BulkString(distance.to_string()));
|
||||
for (key, value) in metadata {
|
||||
item.push(Protocol::BulkString(key));
|
||||
item.push(Protocol::BulkString(value));
|
||||
}
|
||||
@@ -1871,10 +1895,10 @@ async fn lance_search_cmd(
|
||||
}
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1888,24 +1912,14 @@ async fn lance_search_text_cmd(
|
||||
) -> Result<Protocol, DBError> {
|
||||
match server.lance_store() {
|
||||
Ok(lance_store) => {
|
||||
match lance_store.search_text(dataset, query_text, k, nprobes, refine_factor).await {
|
||||
match lance_store.search_with_text(server, dataset, query_text.to_string(), k, nprobes, refine_factor).await {
|
||||
Ok(results) => {
|
||||
let mut response = Vec::new();
|
||||
for result in results {
|
||||
for (distance, metadata) in results {
|
||||
let mut item = Vec::new();
|
||||
item.push(Protocol::BulkString("id".to_string()));
|
||||
item.push(Protocol::BulkString(result.id));
|
||||
item.push(Protocol::BulkString("score".to_string()));
|
||||
item.push(Protocol::BulkString(result.score.to_string()));
|
||||
if let Some(text) = result.text {
|
||||
item.push(Protocol::BulkString("text".to_string()));
|
||||
item.push(Protocol::BulkString(text));
|
||||
}
|
||||
if let Some(image) = result.image_base64 {
|
||||
item.push(Protocol::BulkString("image".to_string()));
|
||||
item.push(Protocol::BulkString(image));
|
||||
}
|
||||
for (key, value) in result.metadata {
|
||||
item.push(Protocol::BulkString("distance".to_string()));
|
||||
item.push(Protocol::BulkString(distance.to_string()));
|
||||
for (key, value) in metadata {
|
||||
item.push(Protocol::BulkString(key));
|
||||
item.push(Protocol::BulkString(value));
|
||||
}
|
||||
@@ -1913,10 +1927,26 @@ async fn lance_search_text_cmd(
|
||||
}
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to sanitize error messages for Redis protocol
|
||||
fn sanitize_error_message(msg: &str) -> String {
|
||||
// Remove newlines, carriage returns, and limit length
|
||||
let sanitized = msg
|
||||
.replace('\n', " ")
|
||||
.replace('\r', " ")
|
||||
.replace('\t', " ");
|
||||
|
||||
// Limit to 200 characters to avoid overly long error messages
|
||||
if sanitized.len() > 200 {
|
||||
format!("{}...", &sanitized[..197])
|
||||
} else {
|
||||
sanitized
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1926,7 +1956,7 @@ async fn lance_embed_text_cmd(
|
||||
) -> Result<Protocol, DBError> {
|
||||
match server.lance_store() {
|
||||
Ok(lance_store) => {
|
||||
match lance_store.embed_texts(texts).await {
|
||||
match lance_store.embed_text(server, texts.to_vec()).await {
|
||||
Ok(embeddings) => {
|
||||
let mut response = Vec::new();
|
||||
for embedding in embeddings {
|
||||
@@ -1938,10 +1968,10 @@ async fn lance_embed_text_cmd(
|
||||
}
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1956,10 +1986,10 @@ async fn lance_create_index_cmd(
|
||||
Ok(lance_store) => {
|
||||
match lance_store.create_index(dataset, index_type, num_partitions, num_sub_vectors).await {
|
||||
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1974,10 +2004,10 @@ async fn lance_list_cmd(server: &Server) -> Result<Protocol, DBError> {
|
||||
.collect();
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1986,41 +2016,28 @@ async fn lance_drop_cmd(server: &Server, dataset: &str) -> Result<Protocol, DBEr
|
||||
Ok(lance_store) => {
|
||||
match lance_store.drop_dataset(dataset).await {
|
||||
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
||||
async fn lance_info_cmd(server: &Server, dataset: &str) -> Result<Protocol, DBError> {
|
||||
match server.lance_store() {
|
||||
Ok(lance_store) => {
|
||||
match lance_store.dataset_info(dataset).await {
|
||||
match lance_store.get_dataset_info(dataset).await {
|
||||
Ok(info) => {
|
||||
let mut response = Vec::new();
|
||||
response.push(Protocol::BulkString("name".to_string()));
|
||||
response.push(Protocol::BulkString(info.name));
|
||||
response.push(Protocol::BulkString("dimension".to_string()));
|
||||
response.push(Protocol::BulkString(info.dimension.to_string()));
|
||||
response.push(Protocol::BulkString("num_rows".to_string()));
|
||||
response.push(Protocol::BulkString(info.num_rows.to_string()));
|
||||
response.push(Protocol::BulkString("schema".to_string()));
|
||||
let schema_items: Vec<Protocol> = info.schema
|
||||
.into_iter()
|
||||
.map(|(field, field_type)| {
|
||||
Protocol::Array(vec![
|
||||
Protocol::BulkString(field),
|
||||
Protocol::BulkString(field_type),
|
||||
])
|
||||
})
|
||||
.collect();
|
||||
response.push(Protocol::Array(schema_items));
|
||||
for (key, value) in info {
|
||||
response.push(Protocol::BulkString(key));
|
||||
response.push(Protocol::BulkString(value));
|
||||
}
|
||||
Ok(Protocol::Array(response))
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR {}", e)))),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(Protocol::err(&format!("ERR Lance store not available: {}", e))),
|
||||
Err(e) => Ok(Protocol::err(&sanitize_error_message(&format!("ERR Lance store not available: {}", e)))),
|
||||
}
|
||||
}
|
||||
|
@@ -3,9 +3,10 @@ use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use arrow::array::{Float32Array, StringArray, ArrayRef, FixedSizeListArray};
|
||||
use arrow::datatypes::{DataType, Field, Schema, FieldRef};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow::array::{Float32Array, StringArray, ArrayRef, FixedSizeListArray, Array};
|
||||
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
|
||||
use arrow::record_batch::{RecordBatch, RecordBatchReader};
|
||||
use arrow::error::ArrowError;
|
||||
use lance::dataset::{Dataset, WriteParams, WriteMode};
|
||||
use lance::index::vector::VectorIndexParams;
|
||||
use lance_index::vector::pq::PQBuildParams;
|
||||
@@ -13,10 +14,39 @@ use lance_index::vector::ivf::IvfBuildParams;
|
||||
use lance_index::DatasetIndexExt;
|
||||
use lance_linalg::distance::MetricType;
|
||||
use futures::TryStreamExt;
|
||||
use base64::Engine;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::error::DBError;
|
||||
use crate::protocol::Protocol;
|
||||
|
||||
// Simple RecordBatchReader implementation for Vec<RecordBatch>
|
||||
struct VecRecordBatchReader {
|
||||
batches: std::vec::IntoIter<Result<RecordBatch, ArrowError>>,
|
||||
}
|
||||
|
||||
impl VecRecordBatchReader {
|
||||
fn new(batches: Vec<RecordBatch>) -> Self {
|
||||
let result_batches = batches.into_iter().map(Ok).collect::<Vec<_>>();
|
||||
Self {
|
||||
batches: result_batches.into_iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for VecRecordBatchReader {
|
||||
type Item = Result<RecordBatch, ArrowError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.batches.next()
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchReader for VecRecordBatchReader {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
// This is a simplified implementation - in practice you'd want to store the schema
|
||||
Arc::new(Schema::empty())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct EmbeddingRequest {
|
||||
@@ -32,6 +62,18 @@ struct EmbeddingResponse {
|
||||
usage: Option<HashMap<String, u32>>,
|
||||
}
|
||||
|
||||
// Ollama-specific request/response structures
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct OllamaEmbeddingRequest {
|
||||
model: String,
|
||||
prompt: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct OllamaEmbeddingResponse {
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
pub struct LanceStore {
|
||||
datasets: Arc<RwLock<HashMap<String, Arc<Dataset>>>>,
|
||||
data_dir: PathBuf,
|
||||
@@ -56,64 +98,104 @@ impl LanceStore {
|
||||
})
|
||||
}
|
||||
|
||||
/// Get embedding service URL from Redis config
|
||||
/// Get embedding service URL from Redis config, default to local Ollama
|
||||
async fn get_embedding_url(&self, server: &crate::server::Server) -> Result<String, DBError> {
|
||||
// Get the embedding URL from Redis config
|
||||
let key = "config:core:aiembed:url";
|
||||
|
||||
// Use HGET to retrieve the URL from Redis hash
|
||||
let cmd = crate::cmd::Cmd::HGet(key.to_string(), "url".to_string());
|
||||
|
||||
// Execute command to get the config
|
||||
let result = cmd.run(&mut server.clone()).await?;
|
||||
|
||||
match result {
|
||||
Protocol::BulkString(url) => Ok(url),
|
||||
Protocol::SimpleString(url) => Ok(url),
|
||||
Protocol::Nil => Err(DBError(
|
||||
"Embedding service URL not configured. Set it with: HSET config:core:aiembed:url url <YOUR_EMBEDDING_SERVICE_URL>".to_string()
|
||||
)),
|
||||
_ => Err(DBError("Invalid embedding URL configuration".to_string())),
|
||||
// Get the embedding URL from Redis config directly from storage
|
||||
let storage = server.current_storage()?;
|
||||
match storage.hget("config:core:aiembed", "url")? {
|
||||
Some(url) => Ok(url),
|
||||
None => Ok("http://localhost:11434".to_string()), // Default to local Ollama
|
||||
}
|
||||
}
|
||||
|
||||
/// Call external embedding service
|
||||
/// Check if we're using Ollama (default) or custom embedding service
|
||||
async fn is_ollama_service(&self, server: &crate::server::Server) -> Result<bool, DBError> {
|
||||
let url = self.get_embedding_url(server).await?;
|
||||
Ok(url.contains("localhost:11434") || url.contains("127.0.0.1:11434"))
|
||||
}
|
||||
|
||||
/// Call external embedding service (Ollama or custom)
|
||||
async fn call_embedding_service(
|
||||
&self,
|
||||
server: &crate::server::Server,
|
||||
texts: Option<Vec<String>>,
|
||||
images: Option<Vec<String>>,
|
||||
) -> Result<Vec<Vec<f32>>, DBError> {
|
||||
let url = self.get_embedding_url(server).await?;
|
||||
let base_url = self.get_embedding_url(server).await?;
|
||||
let is_ollama = self.is_ollama_service(server).await?;
|
||||
|
||||
let request = EmbeddingRequest {
|
||||
texts,
|
||||
images,
|
||||
model: None, // Let the service use its default
|
||||
};
|
||||
|
||||
let response = self.http_client
|
||||
.post(&url)
|
||||
.json(&request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to call embedding service: {}", e)))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_default();
|
||||
return Err(DBError(format!(
|
||||
"Embedding service returned error {}: {}",
|
||||
status, error_text
|
||||
)));
|
||||
if is_ollama {
|
||||
// Use Ollama API format
|
||||
if let Some(texts) = texts {
|
||||
let mut embeddings = Vec::new();
|
||||
for text in texts {
|
||||
let url = format!("{}/api/embeddings", base_url);
|
||||
let request = OllamaEmbeddingRequest {
|
||||
model: "nomic-embed-text".to_string(),
|
||||
prompt: text,
|
||||
};
|
||||
|
||||
let response = self.http_client
|
||||
.post(&url)
|
||||
.json(&request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to call Ollama embedding service: {}", e)))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_default();
|
||||
return Err(DBError(format!(
|
||||
"Ollama embedding service returned error {}: {}",
|
||||
status, error_text
|
||||
)));
|
||||
}
|
||||
|
||||
let ollama_response: OllamaEmbeddingResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to parse Ollama embedding response: {}", e)))?;
|
||||
|
||||
embeddings.push(ollama_response.embedding);
|
||||
}
|
||||
Ok(embeddings)
|
||||
} else if let Some(_images) = images {
|
||||
// Ollama doesn't support image embeddings with this API yet
|
||||
Err(DBError("Image embeddings not supported with Ollama. Please configure a custom embedding service.".to_string()))
|
||||
} else {
|
||||
Err(DBError("No text or images provided for embedding".to_string()))
|
||||
}
|
||||
} else {
|
||||
// Use custom embedding service API format
|
||||
let request = EmbeddingRequest {
|
||||
texts,
|
||||
images,
|
||||
model: None, // Let the service use its default
|
||||
};
|
||||
|
||||
let response = self.http_client
|
||||
.post(&base_url)
|
||||
.json(&request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to call embedding service: {}", e)))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let error_text = response.text().await.unwrap_or_default();
|
||||
return Err(DBError(format!(
|
||||
"Embedding service returned error {}: {}",
|
||||
status, error_text
|
||||
)));
|
||||
}
|
||||
|
||||
let embedding_response: EmbeddingResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to parse embedding response: {}", e)))?;
|
||||
|
||||
Ok(embedding_response.embeddings)
|
||||
}
|
||||
|
||||
let embedding_response: EmbeddingResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to parse embedding response: {}", e)))?;
|
||||
|
||||
Ok(embedding_response.embeddings)
|
||||
}
|
||||
|
||||
pub async fn embed_text(
|
||||
@@ -162,10 +244,11 @@ impl LanceStore {
|
||||
|
||||
// Create an empty RecordBatch with the schema
|
||||
let empty_batch = RecordBatch::new_empty(Arc::new(schema));
|
||||
let batches = vec![empty_batch];
|
||||
|
||||
// Use RecordBatchReader for Lance 0.33
|
||||
let reader = VecRecordBatchReader::new(vec![empty_batch]);
|
||||
let dataset = Dataset::write(
|
||||
batches,
|
||||
reader,
|
||||
dataset_path.to_str().unwrap(),
|
||||
Some(write_params)
|
||||
).await
|
||||
@@ -186,7 +269,7 @@ impl LanceStore {
|
||||
let dataset_path = self.data_dir.join(format!("{}.lance", dataset_name));
|
||||
|
||||
// Open or get cached dataset
|
||||
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
let _dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
// Build RecordBatch
|
||||
let num_vectors = vectors.len();
|
||||
@@ -200,10 +283,13 @@ impl LanceStore {
|
||||
|
||||
// Flatten vectors
|
||||
let flat_vectors: Vec<f32> = vectors.into_iter().flatten().collect();
|
||||
let vector_array = Float32Array::from(flat_vectors);
|
||||
let vector_array = arrow::array::FixedSizeListArray::try_new_from_values(
|
||||
vector_array,
|
||||
dim as i32
|
||||
let values_array = Float32Array::from(flat_vectors);
|
||||
let field = Arc::new(Field::new("item", DataType::Float32, true));
|
||||
let vector_array = FixedSizeListArray::try_new(
|
||||
field,
|
||||
dim as i32,
|
||||
Arc::new(values_array),
|
||||
None
|
||||
).map_err(|e| DBError(format!("Failed to create vector array: {}", e)))?;
|
||||
|
||||
let mut arrays: Vec<ArrayRef> = vec![Arc::new(vector_array)];
|
||||
@@ -241,8 +327,9 @@ impl LanceStore {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let reader = VecRecordBatchReader::new(vec![batch]);
|
||||
Dataset::write(
|
||||
vec![batch],
|
||||
reader,
|
||||
dataset_path.to_str().unwrap(),
|
||||
Some(write_params)
|
||||
).await
|
||||
@@ -261,25 +348,27 @@ impl LanceStore {
|
||||
query_vector: Vec<f32>,
|
||||
k: usize,
|
||||
nprobes: Option<usize>,
|
||||
refine_factor: Option<usize>,
|
||||
_refine_factor: Option<usize>,
|
||||
) -> Result<Vec<(f32, HashMap<String, String>)>, DBError> {
|
||||
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
// Build query
|
||||
let query_array = Float32Array::from(query_vector.clone());
|
||||
let mut query = dataset.scan();
|
||||
query = query.nearest(
|
||||
query.nearest(
|
||||
"vector",
|
||||
&query_vector,
|
||||
&query_array,
|
||||
k,
|
||||
).map_err(|e| DBError(format!("Failed to build search query: {}", e)))?;
|
||||
|
||||
if let Some(nprobes) = nprobes {
|
||||
query = query.nprobes(nprobes);
|
||||
query.nprobs(nprobes);
|
||||
}
|
||||
|
||||
if let Some(refine) = refine_factor {
|
||||
query = query.refine_factor(refine);
|
||||
}
|
||||
// Note: refine_factor might not be available in this Lance version
|
||||
// if let Some(refine) = refine_factor {
|
||||
// query.refine_factor(refine);
|
||||
// }
|
||||
|
||||
// Execute search
|
||||
let results = query
|
||||
@@ -399,33 +488,41 @@ impl LanceStore {
|
||||
num_partitions: Option<usize>,
|
||||
num_sub_vectors: Option<usize>,
|
||||
) -> Result<(), DBError> {
|
||||
let dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
let mut params = VectorIndexParams::default();
|
||||
let _dataset = self.get_or_open_dataset(dataset_name).await?;
|
||||
|
||||
match index_type.to_uppercase().as_str() {
|
||||
"IVF_PQ" => {
|
||||
params.ivf = IvfBuildParams {
|
||||
let ivf_params = IvfBuildParams {
|
||||
num_partitions: num_partitions.unwrap_or(256),
|
||||
..Default::default()
|
||||
};
|
||||
params.pq = PQBuildParams {
|
||||
let pq_params = PQBuildParams {
|
||||
num_sub_vectors: num_sub_vectors.unwrap_or(16),
|
||||
..Default::default()
|
||||
};
|
||||
let params = VectorIndexParams::with_ivf_pq_params(
|
||||
MetricType::L2,
|
||||
ivf_params,
|
||||
pq_params,
|
||||
);
|
||||
|
||||
// Get a mutable reference to the dataset
|
||||
let mut dataset_mut = Dataset::open(self.data_dir.join(format!("{}.lance", dataset_name)).to_str().unwrap())
|
||||
.await
|
||||
.map_err(|e| DBError(format!("Failed to open dataset for indexing: {}", e)))?;
|
||||
|
||||
dataset_mut.create_index(
|
||||
&["vector"],
|
||||
lance_index::IndexType::Vector,
|
||||
None,
|
||||
¶ms,
|
||||
true
|
||||
).await
|
||||
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||
}
|
||||
_ => return Err(DBError(format!("Unsupported index type: {}", index_type))),
|
||||
}
|
||||
|
||||
dataset.create_index(
|
||||
&["vector"],
|
||||
lance::index::IndexType::Vector,
|
||||
None,
|
||||
¶ms,
|
||||
true
|
||||
).await
|
||||
.map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -496,14 +593,14 @@ impl LanceStore {
|
||||
|
||||
let mut info = HashMap::new();
|
||||
info.insert("name".to_string(), name.to_string());
|
||||
info.insert("version".to_string(), dataset.version().to_string());
|
||||
info.insert("num_rows".to_string(), dataset.count_rows().await?.to_string());
|
||||
info.insert("version".to_string(), dataset.version().version.to_string());
|
||||
info.insert("num_rows".to_string(), dataset.count_rows(None).await?.to_string());
|
||||
|
||||
// Get schema info
|
||||
let schema = dataset.schema();
|
||||
let fields: Vec<String> = schema.fields()
|
||||
let fields: Vec<String> = schema.fields
|
||||
.iter()
|
||||
.map(|f| format!("{}:{}", f.name(), f.data_type()))
|
||||
.map(|f| format!("{}:{}", f.name, f.data_type()))
|
||||
.collect();
|
||||
info.insert("schema".to_string(), fields.join(", "));
|
||||
|
||||
|
Reference in New Issue
Block a user