3 Commits

Author SHA1 Message Date
Maxime Van Hees
22ac4c9ed6 implementation of tantivy datastore + updated RPC calls to deal with tantivy + docs 2025-09-23 17:15:40 +02:00
Maxime Van Hees
c470772a13 Merge branch 'management_rpc_server' 2025-09-22 16:26:53 +02:00
8331ed032b ... 2025-09-17 07:02:44 +02:00
14 changed files with 2657 additions and 12 deletions

741
Cargo.lock generated
View File

@@ -70,6 +70,21 @@ dependencies = [
"sha2",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "anstream"
version = "0.6.20"
@@ -224,6 +239,15 @@ version = "2.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
[[package]]
name = "bitpacking"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92"
dependencies = [
"crunchy",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
@@ -233,6 +257,37 @@ dependencies = [
"generic-array",
]
[[package]]
name = "bon"
version = "3.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2529c31017402be841eb45892278a6c21a000c0a17643af326c73a73f83f0fb"
dependencies = [
"bon-macros",
"rustversion",
]
[[package]]
name = "bon-macros"
version = "3.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82020dadcb845a345591863adb65d74fa8dc5c18a0b6d408470e13b7adc7005"
dependencies = [
"darling",
"ident_case",
"prettyplease",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.106",
]
[[package]]
name = "bumpalo"
version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "byteorder"
version = "1.5.0"
@@ -252,9 +307,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54"
dependencies = [
"find-msvc-tools",
"jobserver",
"libc",
"shlex",
]
[[package]]
name = "census"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0"
[[package]]
name = "cesu8"
version = "1.1.0"
@@ -411,6 +474,25 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
@@ -426,6 +508,12 @@ version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "crunchy"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "crypto-common"
version = "0.1.6"
@@ -464,6 +552,41 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "darling"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim 0.11.1",
"syn 2.0.106",
]
[[package]]
name = "darling_macro"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
dependencies = [
"darling_core",
"quote",
"syn 2.0.106",
]
[[package]]
name = "dashmap"
version = "5.5.3"
@@ -487,6 +610,16 @@ dependencies = [
"zeroize",
]
[[package]]
name = "deranged"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc"
dependencies = [
"powerfmt",
"serde",
]
[[package]]
name = "digest"
version = "0.10.7"
@@ -509,6 +642,12 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "downcast-rs"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc"
[[package]]
name = "ed25519"
version = "2.2.3"
@@ -533,12 +672,40 @@ dependencies = [
"zeroize",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "errno"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.61.0",
]
[[package]]
name = "fastdivide"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471"
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "fiat-crypto"
version = "0.2.9"
@@ -610,6 +777,12 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "form_urlencoded"
version = "1.2.2"
@@ -629,6 +802,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "fs4"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8640e34b88f7652208ce9e88b1a37a2ae95227d84abec377ccd3c5cfeb141ed4"
dependencies = [
"rustix",
"windows-sys 0.59.0",
]
[[package]]
name = "futures"
version = "0.3.31"
@@ -802,6 +985,11 @@ name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]]
name = "heck"
@@ -832,6 +1020,7 @@ dependencies = [
"serde_json",
"sha2",
"sled",
"tantivy",
"thiserror 1.0.69",
"tokio",
"x25519-dalek",
@@ -855,6 +1044,12 @@ dependencies = [
"digest",
]
[[package]]
name = "htmlescape"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
[[package]]
name = "http"
version = "1.3.1"
@@ -962,6 +1157,15 @@ dependencies = [
"tracing",
]
[[package]]
name = "hyperloglogplus"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3"
dependencies = [
"serde",
]
[[package]]
name = "i18n-config"
version = "0.4.8"
@@ -1117,6 +1321,12 @@ dependencies = [
"zerovec",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "1.1.0"
@@ -1208,6 +1418,15 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.15"
@@ -1236,6 +1455,26 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "jobserver"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom 0.3.3",
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "852f13bec5eba4ba9afbeb93fd7c13fe56147f055939ae21c43a29a0ecb2702e"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "jsonrpsee"
version = "0.26.0"
@@ -1397,12 +1636,30 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "levenshtein_automata"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25"
[[package]]
name = "libc"
version = "0.2.175"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
[[package]]
name = "libm"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "linux-raw-sys"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
[[package]]
name = "litemap"
version = "0.8.0"
@@ -1425,12 +1682,45 @@ version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "lru"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
dependencies = [
"hashbrown 0.15.5",
]
[[package]]
name = "lz4_flex"
version = "0.11.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
[[package]]
name = "measure_time"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e"
dependencies = [
"log",
]
[[package]]
name = "memchr"
version = "2.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "memmap2"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7"
dependencies = [
"libc",
]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
@@ -1457,6 +1747,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "murmurhash32"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b"
[[package]]
name = "nom"
version = "7.1.3"
@@ -1467,6 +1763,22 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "num-conv"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "object"
version = "0.36.7"
@@ -1488,6 +1800,12 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
[[package]]
name = "oneshot"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea"
[[package]]
name = "opaque-debug"
version = "0.3.1"
@@ -1500,6 +1818,15 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
[[package]]
name = "ownedbytes"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "parking_lot"
version = "0.11.2"
@@ -1606,6 +1933,12 @@ dependencies = [
"spki",
]
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "poly1305"
version = "0.8.0"
@@ -1626,6 +1959,12 @@ dependencies = [
"zerovec",
]
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
@@ -1635,6 +1974,16 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "prettyplease"
version = "0.2.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
dependencies = [
"proc-macro2",
"syn 2.0.106",
]
[[package]]
name = "proc-macro-crate"
version = "3.3.0"
@@ -1751,6 +2100,36 @@ dependencies = [
"getrandom 0.3.3",
]
[[package]]
name = "rand_distr"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
dependencies = [
"num-traits",
"rand 0.8.5",
]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redb"
version = "2.6.2"
@@ -1799,6 +2178,35 @@ dependencies = [
"bitflags 2.9.3",
]
[[package]]
name = "regex"
version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]]
name = "ring"
version = "0.17.14"
@@ -1853,6 +2261,16 @@ dependencies = [
"walkdir",
]
[[package]]
name = "rust-stemmers"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
dependencies = [
"serde",
"serde_derive",
]
[[package]]
name = "rustc-demangle"
version = "0.1.26"
@@ -1880,6 +2298,19 @@ dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
dependencies = [
"bitflags 2.9.3",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.61.0",
]
[[package]]
name = "rustls"
version = "0.23.31"
@@ -1954,6 +2385,12 @@ dependencies = [
"untrusted",
]
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "ryu"
version = "1.0.20"
@@ -2141,6 +2578,15 @@ dependencies = [
"rand_core 0.6.4",
]
[[package]]
name = "sketches-ddsketch"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a"
dependencies = [
"serde",
]
[[package]]
name = "slab"
version = "0.4.11"
@@ -2277,6 +2723,165 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "tantivy"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "502915c7381c5cb2d2781503962610cb880ad8f1a0ca95df1bae645d5ebf2545"
dependencies = [
"aho-corasick",
"arc-swap",
"base64 0.22.1",
"bitpacking",
"bon",
"byteorder",
"census",
"crc32fast",
"crossbeam-channel",
"downcast-rs",
"fastdivide",
"fnv",
"fs4",
"htmlescape",
"hyperloglogplus",
"itertools",
"levenshtein_automata",
"log",
"lru",
"lz4_flex",
"measure_time",
"memmap2",
"once_cell",
"oneshot",
"rayon",
"regex",
"rust-stemmers",
"rustc-hash 2.1.1",
"serde",
"serde_json",
"sketches-ddsketch",
"smallvec",
"tantivy-bitpacker",
"tantivy-columnar",
"tantivy-common",
"tantivy-fst",
"tantivy-query-grammar",
"tantivy-stacker",
"tantivy-tokenizer-api",
"tempfile",
"thiserror 2.0.16",
"time",
"uuid",
"winapi",
]
[[package]]
name = "tantivy-bitpacker"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3b04eed5108d8283607da6710fe17a7663523440eaf7ea5a1a440d19a1448b6"
dependencies = [
"bitpacking",
]
[[package]]
name = "tantivy-columnar"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b628488ae936c83e92b5c4056833054ca56f76c0e616aee8339e24ac89119cd"
dependencies = [
"downcast-rs",
"fastdivide",
"itertools",
"serde",
"tantivy-bitpacker",
"tantivy-common",
"tantivy-sstable",
"tantivy-stacker",
]
[[package]]
name = "tantivy-common"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f880aa7cab0c063a47b62596d10991cdd0b6e0e0575d9c5eeb298b307a25de55"
dependencies = [
"async-trait",
"byteorder",
"ownedbytes",
"serde",
"time",
]
[[package]]
name = "tantivy-fst"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18"
dependencies = [
"byteorder",
"regex-syntax",
"utf8-ranges",
]
[[package]]
name = "tantivy-query-grammar"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "768fccdc84d60d86235d42d7e4c33acf43c418258ff5952abf07bd7837fcd26b"
dependencies = [
"nom",
"serde",
"serde_json",
]
[[package]]
name = "tantivy-sstable"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8292095d1a8a2c2b36380ec455f910ab52dde516af36321af332c93f20ab7d5"
dependencies = [
"futures-util",
"itertools",
"tantivy-bitpacker",
"tantivy-common",
"tantivy-fst",
"zstd",
]
[[package]]
name = "tantivy-stacker"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d38a379411169f0b3002c9cba61cdfe315f757e9d4f239c00c282497a0749d"
dependencies = [
"murmurhash32",
"rand_distr",
"tantivy-common",
]
[[package]]
name = "tantivy-tokenizer-api"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23024f6aeb25ceb1a0e27740c84bdb0fae52626737b7e9a9de6ad5aa25c7b038"
dependencies = [
"serde",
]
[[package]]
name = "tempfile"
version = "3.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
dependencies = [
"fastrand",
"getrandom 0.3.3",
"once_cell",
"rustix",
"windows-sys 0.61.0",
]
[[package]]
name = "thiserror"
version = "1.0.69"
@@ -2317,6 +2922,37 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "time"
version = "0.3.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
dependencies = [
"deranged",
"itoa",
"num-conv",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
name = "time-core"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
[[package]]
name = "time-macros"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "tinystr"
version = "0.8.1"
@@ -2550,6 +3186,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "utf8-ranges"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba"
[[package]]
name = "utf8_iter"
version = "1.0.4"
@@ -2562,6 +3204,18 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
dependencies = [
"getrandom 0.3.3",
"js-sys",
"serde",
"wasm-bindgen",
]
[[package]]
name = "version_check"
version = "0.9.5"
@@ -2611,6 +3265,65 @@ dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab10a69fbd0a177f5f649ad4d8d3305499c42bab9aef2f7ff592d0ec8f833819"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bb702423545a6007bbc368fde243ba47ca275e549c8a28617f56f6ba53b1d1c"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn 2.0.106",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc65f4f411d91494355917b605e1480033152658d71f722a90647f56a70c88a0"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffc003a991398a8ee604a401e194b6b3a39677b3173d6e74495eb51b82e99a32"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.106",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "293c37f4efa430ca14db3721dfbe48d8c33308096bd44d80ebaa775ab71ba1cf"
dependencies = [
"unicode-ident",
]
[[package]]
name = "webpki-root-certs"
version = "0.26.11"
@@ -3053,3 +3766,31 @@ dependencies = [
"quote",
"syn 2.0.106",
]
[[package]]
name = "zstd"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.16+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
dependencies = [
"cc",
"pkg-config",
]

View File

@@ -26,6 +26,7 @@ ed25519-dalek = "2"
x25519-dalek = "2"
base64 = "0.22"
jsonrpsee = { version = "0.26.0", features = ["http-client", "ws-client", "server", "macros"] }
tantivy = "0.25.0"
[dev-dependencies]
redis = { version = "0.24", features = ["aio", "tokio-comp"] }

253
docs/tantivy.md Normal file
View File

@@ -0,0 +1,253 @@
# Tantivy FullText Backend (JSONRPC)
This document explains how to use HeroDBs Tantivy-backed fulltext search as a dedicated database backend and provides copypasteable JSONRPC requests. Tantivy is available only for nonadmin databases (db_id >= 1). Admin DB 0 always uses Redb/Sled and rejects FT operations.
Important characteristics:
- Tantivy is a third backend alongside Redb and Sled. It provides search indexes only; there is no KV store backing it.
- On Tantivy databases, Redis KV/list/hash commands are rejected; only FT commands and basic control (SELECT, CLIENT, INFO, etc.) are allowed.
- FT JSONRPC is namespaced as "herodb" and methods are named with underscore: herodb_ftCreate, herodb_ftAdd, herodb_ftSearch, herodb_ftDel, herodb_ftInfo, herodb_ftDrop.
Reference to server implementation:
- RPC methods are defined in [rust.trait Rpc()](src/rpc.rs:70):
- [rust.fn ft_create()](src/rpc.rs:121)
- [rust.fn ft_add()](src/rpc.rs:130)
- [rust.fn ft_search()](src/rpc.rs:141)
- [rust.fn ft_del()](src/rpc.rs:154)
- [rust.fn ft_info()](src/rpc.rs:158)
- [rust.fn ft_drop()](src/rpc.rs:162)
Notes on responses:
- ftCreate/ftAdd/ftDel/ftDrop return a JSON boolean: true on success.
- ftSearch/ftInfo return a JSON object with a single key "resp" containing a RESPencoded string (wire format used by Redis). You can display or parse it on the client side as needed.
RESP usage (redis-cli):
- For RESP clients, you must SELECT the Tantivy database first. SELECT now succeeds for Tantivy DBs without opening KV storage.
- After SELECT, you can run FT.* commands within that DB context.
Example with redis-cli:
```bash
# Connect to server
redis-cli -p 6379
# Select Tantivy DB 1 (public by default)
SELECT 1
# → OK
# Create index
FT.CREATE product_catalog SCHEMA title TEXT description TEXT category TAG price NUMERIC rating NUMERIC location GEO
# → OK
# Add a document
FT.ADD product_catalog product:1 1.0 title "Wireless Bluetooth Headphones" description "Premium noise-canceling headphones with 30-hour battery life" category "electronics,audio" price 299.99 rating 4.5 location "-122.4194,37.7749"
# → OK
# Search
FT.SEARCH product_catalog wireless LIMIT 0 3
# → RESP array with hits
```
Storage layout (on disk):
- Indices are stored per database under:
- <base_dir>/search_indexes/<db_id>/<index_name>
- Example: /tmp/test/search_indexes/1/product_catalog
0) Create a new Tantivy database
Use herodb_createDatabase with backend "Tantivy". DB 0 cannot be Tantivy.
```json
{
"jsonrpc": "2.0",
"id": 1,
"method": "herodb_createDatabase",
"params": [
"Tantivy",
{ "name": "search-db", "storage_path": null, "max_size": null, "redis_version": null },
null
]
}
```
The response contains the allocated db_id (>= 1). Use that id in the calls below.
1) FT.CREATE — create an index with schema
Method: herodb_ftCreate → [rust.fn ft_create()](src/rpc.rs:121)
Schema format is an array of tuples: [ [field_name, field_type, [options...] ], ... ]
Supported field types: "TEXT", "NUMERIC" (defaults to F64), "TAG", "GEO"
Supported options (subset): "WEIGHT", "SORTABLE", "NOINDEX", "SEPARATOR", "CASESENSITIVE"
```json
{
"jsonrpc": "2.0",
"id": 2,
"method": "herodb_ftCreate",
"params": [
1,
"product_catalog",
[
["title", "TEXT", ["SORTABLE"]],
["description", "TEXT", []],
["category", "TAG", ["SEPARATOR", ","]],
["price", "NUMERIC", ["SORTABLE"]],
["rating", "NUMERIC", []],
["location", "GEO", []]
]
]
}
```
Returns: true on success.
2) FT.ADD — add or replace a document
Method: herodb_ftAdd → [rust.fn ft_add()](src/rpc.rs:130)
Fields is an object (map) of field_name → value (all values are sent as strings). GEO expects "lat,lon".
```json
{
"jsonrpc": "2.0",
"id": 3,
"method": "herodb_ftAdd",
"params": [
1,
"product_catalog",
"product:1",
1.0,
{
"title": "Wireless Bluetooth Headphones",
"description": "Premium noise-canceling headphones with 30-hour battery life",
"category": "electronics,audio",
"price": "299.99",
"rating": "4.5",
"location": "-122.4194,37.7749"
}
]
}
```
Returns: true on success.
3) FT.SEARCH — query an index
Method: herodb_ftSearch → [rust.fn ft_search()](src/rpc.rs:141)
Parameters: (db_id, index_name, query, filters?, limit?, offset?, return_fields?)
- filters: array of [field, value] pairs (Equals filter)
- limit/offset: numbers (defaults: limit=10, offset=0)
- return_fields: array of field names to include (optional)
Simple query:
```json
{
"jsonrpc": "2.0",
"id": 4,
"method": "herodb_ftSearch",
"params": [1, "product_catalog", "wireless", null, 10, 0, null]
}
```
Pagination + filters + selected fields:
```json
{
"jsonrpc": "2.0",
"id": 5,
"method": "herodb_ftSearch",
"params": [
1,
"product_catalog",
"mouse",
[["category", "electronics"]],
5,
0,
["title", "price", "rating"]
]
}
```
Response shape:
```json
{
"jsonrpc": "2.0",
"id": 5,
"result": { "resp": "*...RESP encoded array..." }
}
```
4) FT.INFO — index metadata
Method: herodb_ftInfo → [rust.fn ft_info()](src/rpc.rs:158)
```json
{
"jsonrpc": "2.0",
"id": 6,
"method": "herodb_ftInfo",
"params": [1, "product_catalog"]
}
```
Response shape:
```json
{
"jsonrpc": "2.0",
"id": 6,
"result": { "resp": "*...RESP encoded array with fields and counts..." }
}
```
5) FT.DEL — delete by doc id
Method: herodb_ftDel → [rust.fn ft_del()](src/rpc.rs:154)
```json
{
"jsonrpc": "2.0",
"id": 7,
"method": "herodb_ftDel",
"params": [1, "product_catalog", "product:1"]
}
```
Returns: true on success. Note: current implementation logs and returns success; physical delete may be a noop until delete is finalized in the engine.
6) FT.DROP — drop an index
Method: herodb_ftDrop → [rust.fn ft_drop()](src/rpc.rs:162)
```json
{
"jsonrpc": "2.0",
"id": 8,
"method": "herodb_ftDrop",
"params": [1, "product_catalog"]
}
```
Returns: true on success.
Field types and options
- TEXT: stored/indexed/tokenized text. "SORTABLE" marks it fast (stored + fast path in our wrapper).
- NUMERIC: stored/indexed numeric; default precision F64. "SORTABLE" enables fast column.
- TAG: exact matching terms. Options: "SEPARATOR" (default ","), "CASESENSITIVE" (default false).
- GEO: "lat,lon" string; stored as two numeric fields internally.
Backend and permission gating
- FT methods are rejected on DB 0.
- FT methods require the database backend to be Tantivy; otherwise RPC returns an error.
- Writelike FT methods (create/add/del/drop) follow the same permission model as Redis writes on selected databases.
Troubleshooting
- "DB backend is not Tantivy": ensure the database was created with backend "Tantivy".
- "FT not allowed on DB 0": use a nonadmin database id (>= 1).
- Empty search results: confirm that the queried fields are tokenized/indexed (TEXT) and that documents were added successfully.
Related docs
- Commandlevel search overview: [docs/search.md](docs/search.md:1)
- RPC definitions: [src/rpc.rs](src/rpc.rs:1)

143
run.sh Executable file
View File

@@ -0,0 +1,143 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# Test script for HeroDB - Redis-compatible database with redb backend
# This script starts the server and runs comprehensive tests
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
DB_DIR="/tmp/test_db"
PORT=6381
SERVER_PID=""
# Function to print colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
# Function to cleanup on exit
cleanup() {
if [ ! -z "$SERVER_PID" ]; then
print_status "Stopping HeroDB server (PID: $SERVER_PID)..."
kill $SERVER_PID 2>/dev/null || true
wait $SERVER_PID 2>/dev/null || true
fi
# Clean up test database
if [ -d "$DB_DIR" ]; then
print_status "Cleaning up test database directory..."
rm -rf "$DB_DIR"
fi
}
# Set trap to cleanup on script exit
trap cleanup EXIT
# Function to wait for server to start
wait_for_server() {
local max_attempts=30
local attempt=1
print_status "Waiting for server to start on port $PORT..."
while [ $attempt -le $max_attempts ]; do
if nc -z localhost $PORT 2>/dev/null; then
print_success "Server is ready!"
return 0
fi
echo -n "."
sleep 1
attempt=$((attempt + 1))
done
print_error "Server failed to start within $max_attempts seconds"
return 1
}
# Function to send Redis command and get response
redis_cmd() {
local cmd="$1"
local expected="$2"
print_status "Testing: $cmd"
local result=$(echo "$cmd" | redis-cli -p $PORT --raw 2>/dev/null || echo "ERROR")
if [ "$expected" != "" ] && [ "$result" != "$expected" ]; then
print_error "Expected: '$expected', Got: '$result'"
return 1
else
print_success "$cmd -> $result"
return 0
fi
}
# Main execution
main() {
print_status "Starting HeroDB"
# Build the project
print_status "Building HeroDB..."
if ! cargo build -p herodb --release; then
print_error "Failed to build HeroDB"
exit 1
fi
# Create test database directory
mkdir -p "$DB_DIR"
# Start the server
print_status "Starting HeroDB server..."
${SCRIPT_DIR}/target/release/herodb --dir "$DB_DIR" --port $PORT &
SERVER_PID=$!
# Wait for server to start
if ! wait_for_server; then
print_error "Failed to start server"
exit 1
fi
}
# Check dependencies
check_dependencies() {
if ! command -v cargo &> /dev/null; then
print_error "cargo is required but not installed"
exit 1
fi
if ! command -v nc &> /dev/null; then
print_warning "netcat (nc) not found - some tests may not work properly"
fi
if ! command -v redis-cli &> /dev/null; then
print_warning "redis-cli not found - using netcat fallback"
fi
}
# Run dependency check and main function
check_dependencies
main "$@"
tail -f /dev/null

View File

@@ -1,4 +1,7 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
echo "🧪 Running HeroDB Redis Compatibility Tests"
echo "=========================================="

View File

@@ -48,6 +48,9 @@ fn init_admin_storage(
let storage: Arc<dyn StorageBackend> = match backend {
options::BackendType::Redb => Arc::new(Storage::new(&db_file, true, Some(admin_secret))?),
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, true, Some(admin_secret))?),
options::BackendType::Tantivy => {
return Err(DBError("Admin DB 0 cannot use Tantivy backend".to_string()))
}
};
Ok(storage)
}
@@ -199,6 +202,9 @@ pub fn open_data_storage(
let storage: Arc<dyn StorageBackend> = match effective_backend {
options::BackendType::Redb => Arc::new(Storage::new(&db_file, should_encrypt, enc.as_deref())?),
options::BackendType::Sled => Arc::new(SledStorage::new(&db_file, should_encrypt, enc.as_deref())?),
options::BackendType::Tantivy => {
return Err(DBError("Tantivy backend has no KV storage; use FT.* commands only".to_string()))
}
};
// Publish to registry
@@ -291,6 +297,7 @@ pub fn set_database_backend(
let val = match db_backend {
options::BackendType::Redb => "Redb",
options::BackendType::Sled => "Sled",
options::BackendType::Tantivy => "Tantivy",
};
let _ = admin.hset(&mk, vec![("backend".to_string(), val.to_string())])?;
Ok(())
@@ -307,6 +314,7 @@ pub fn get_database_backend(
match admin.hget(&mk, "backend")? {
Some(s) if s == "Redb" => Ok(Some(options::BackendType::Redb)),
Some(s) if s == "Sled" => Ok(Some(options::BackendType::Sled)),
Some(s) if s == "Tantivy" => Ok(Some(options::BackendType::Tantivy)),
_ => Ok(None),
}
}

View File

@@ -91,6 +91,41 @@ pub enum Cmd {
SymKeygen,
SymEncrypt(String, String), // key_b64, message
SymDecrypt(String, String), // key_b64, ciphertext_b64
// Full-text search commands with schema support
FtCreate {
index_name: String,
schema: Vec<(String, String, Vec<String>)>, // (field_name, field_type, options)
},
FtAdd {
index_name: String,
doc_id: String,
score: f64,
fields: std::collections::HashMap<String, String>,
},
FtSearch {
index_name: String,
query: String,
filters: Vec<(String, String)>, // field, value pairs
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
},
FtDel(String, String), // index_name, doc_id
FtInfo(String), // index_name
FtDrop(String), // index_name
FtAlter {
index_name: String,
field_name: String,
field_type: String,
options: Vec<String>,
},
FtAggregate {
index_name: String,
query: String,
group_by: Vec<String>,
reducers: Vec<String>,
}
}
impl Cmd {
@@ -646,6 +681,140 @@ impl Cmd {
_ => return Err(DBError(format!("unsupported SYM subcommand {:?}", cmd))),
}
}
"ft.create" => {
if cmd.len() < 4 || cmd[2].to_uppercase() != "SCHEMA" {
return Err(DBError("ERR FT.CREATE requires: indexname SCHEMA field1 type1 [options] ...".to_string()));
}
let index_name = cmd[1].clone();
let mut schema = Vec::new();
let mut i = 3;
while i < cmd.len() {
if i + 1 >= cmd.len() {
return Err(DBError("ERR incomplete field definition".to_string()));
}
let field_name = cmd[i].clone();
let field_type = cmd[i + 1].to_uppercase();
let mut options = Vec::new();
i += 2;
// Parse field options until we hit another field name or end
while i < cmd.len()
&& ["WEIGHT","SORTABLE","NOINDEX","SEPARATOR","CASESENSITIVE"]
.contains(&cmd[i].to_uppercase().as_str())
{
options.push(cmd[i].to_uppercase());
i += 1;
// If this option takes a value, consume it too
if i > 0 && ["SEPARATOR","WEIGHT"].contains(&cmd[i - 1].to_uppercase().as_str()) && i < cmd.len() {
options.push(cmd[i].clone());
i += 1;
}
}
schema.push((field_name, field_type, options));
}
Cmd::FtCreate { index_name, schema }
}
"ft.add" => {
if cmd.len() < 5 {
return Err(DBError("ERR FT.ADD requires: index_name doc_id score field value ...".to_string()));
}
let index_name = cmd[1].clone();
let doc_id = cmd[2].clone();
let score = cmd[3].parse::<f64>().map_err(|_| DBError("ERR score must be a number".to_string()))?;
let mut fields = std::collections::HashMap::new();
let mut i = 4;
while i + 1 < cmd.len() {
fields.insert(cmd[i].clone(), cmd[i + 1].clone());
i += 2;
}
Cmd::FtAdd { index_name, doc_id, score, fields }
}
"ft.search" => {
if cmd.len() < 3 {
return Err(DBError("ERR FT.SEARCH requires: index_name query [options]".to_string()));
}
let index_name = cmd[1].clone();
let query = cmd[2].clone();
let mut filters = Vec::new();
let mut limit = None;
let mut offset = None;
let mut return_fields = None;
let mut i = 3;
while i < cmd.len() {
match cmd[i].to_uppercase().as_str() {
"FILTER" => {
if i + 2 >= cmd.len() {
return Err(DBError("ERR FILTER requires field and value".to_string()));
}
filters.push((cmd[i + 1].clone(), cmd[i + 2].clone()));
i += 3;
}
"LIMIT" => {
if i + 2 >= cmd.len() {
return Err(DBError("ERR LIMIT requires offset and num".to_string()));
}
offset = Some(cmd[i + 1].parse().unwrap_or(0));
limit = Some(cmd[i + 2].parse().unwrap_or(10));
i += 3;
}
"RETURN" => {
if i + 1 >= cmd.len() {
return Err(DBError("ERR RETURN requires field count".to_string()));
}
let count: usize = cmd[i + 1].parse().unwrap_or(0);
i += 2;
let mut fields = Vec::new();
for _ in 0..count {
if i < cmd.len() {
fields.push(cmd[i].clone());
i += 1;
}
}
return_fields = Some(fields);
}
_ => i += 1,
}
}
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields }
}
"ft.del" => {
if cmd.len() != 3 {
return Err(DBError("ERR FT.DEL requires: index_name doc_id".to_string()));
}
Cmd::FtDel(cmd[1].clone(), cmd[2].clone())
}
"ft.info" => {
if cmd.len() != 2 {
return Err(DBError("ERR FT.INFO requires: index_name".to_string()));
}
Cmd::FtInfo(cmd[1].clone())
}
"ft.drop" => {
if cmd.len() != 2 {
return Err(DBError("ERR FT.DROP requires: index_name".to_string()));
}
Cmd::FtDrop(cmd[1].clone())
}
"ft.alter" => {
if cmd.len() < 5 {
return Err(DBError("ERR FT.ALTER requires: index_name field_name field_type [options]".to_string()));
}
let index_name = cmd[1].clone();
let field_name = cmd[2].clone();
let field_type = cmd[3].clone();
let options = if cmd.len() > 4 { cmd[4..].to_vec() } else { vec![] };
Cmd::FtAlter { index_name, field_name, field_type, options }
}
"ft.aggregate" => {
if cmd.len() < 3 {
return Err(DBError("ERR FT.AGGREGATE requires: index_name query [options]".to_string()));
}
let index_name = cmd[1].clone();
let query = cmd[2].clone();
// Minimal parse for now
let group_by = Vec::new();
let reducers = Vec::new();
Cmd::FtAggregate { index_name, query, group_by, reducers }
}
_ => Cmd::Unknow(cmd[0].clone()),
},
protocol,
@@ -671,6 +840,59 @@ impl Cmd {
return Ok(Protocol::SimpleString("QUEUED".to_string()));
}
// Backend gating for Tantivy-only DBs: allow only FT.* and basic control/info commands
// Determine per-selected-db backend via admin meta (not process default).
let is_tantivy_backend = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if is_tantivy_backend {
match &self {
Cmd::Select(..)
| Cmd::Quit
| Cmd::Client(..)
| Cmd::ClientSetName(..)
| Cmd::ClientGetName
| Cmd::Command(..)
| Cmd::Info(..)
| Cmd::FtCreate { .. }
| Cmd::FtAdd { .. }
| Cmd::FtSearch { .. }
| Cmd::FtDel(..)
| Cmd::FtInfo(..)
| Cmd::FtDrop(..)
| Cmd::FtAlter { .. }
| Cmd::FtAggregate { .. } => {}
_ => {
return Ok(Protocol::err("ERR backend is Tantivy; only FT.* commands are allowed"));
}
}
}
// If selected DB is not Tantivy, forbid all FT.* commands here.
if !is_tantivy_backend {
match &self {
Cmd::FtCreate { .. }
| Cmd::FtAdd { .. }
| Cmd::FtSearch { .. }
| Cmd::FtDel(..)
| Cmd::FtInfo(..)
| Cmd::FtDrop(..)
| Cmd::FtAlter { .. }
| Cmd::FtAggregate { .. } => {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
_ => {}
}
}
match self {
Cmd::Select(db, key) => select_cmd(server, db, key).await,
Cmd::Ping => Ok(Protocol::SimpleString("PONG".to_string())),
@@ -767,6 +989,32 @@ impl Cmd {
Cmd::SymEncrypt(key_b64, message) => Ok(crate::sym::cmd_sym_encrypt(&key_b64, &message).await),
Cmd::SymDecrypt(key_b64, ct_b64) => Ok(crate::sym::cmd_sym_decrypt(&key_b64, &ct_b64).await),
// Full-text search commands
Cmd::FtCreate { index_name, schema } => {
crate::search_cmd::ft_create_cmd(server, index_name, schema).await
}
Cmd::FtAdd { index_name, doc_id, score, fields } => {
crate::search_cmd::ft_add_cmd(server, index_name, doc_id, score, fields).await
}
Cmd::FtSearch { index_name, query, filters, limit, offset, return_fields } => {
crate::search_cmd::ft_search_cmd(server, index_name, query, filters, limit, offset, return_fields).await
}
Cmd::FtDel(index_name, doc_id) => {
crate::search_cmd::ft_del_cmd(server, index_name, doc_id).await
}
Cmd::FtInfo(index_name) => {
crate::search_cmd::ft_info_cmd(server, index_name).await
}
Cmd::FtDrop(index_name) => {
crate::search_cmd::ft_drop_cmd(server, index_name).await
}
Cmd::FtAlter { .. } => {
Ok(Protocol::err("FT.ALTER not implemented yet"))
}
Cmd::FtAggregate { .. } => {
Ok(Protocol::err("FT.AGGREGATE not implemented yet"))
}
Cmd::Unknow(s) => Ok(Protocol::err(&format!("ERR unknown command `{}`", s))),
}
}
@@ -852,13 +1100,28 @@ async fn select_cmd(server: &mut Server, db: u64, key: Option<String>) -> Result
None => return Ok(Protocol::err("ERR invalid access key")),
};
// Set selected database and permissions, then open storage
// Set selected database and permissions, then open storage (skip for Tantivy backend)
server.selected_db = db;
server.current_permissions = Some(perms);
match server.current_storage() {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
// Resolve effective backend for this db_id from admin meta
let eff_backend = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
db,
)
.ok()
.flatten();
if matches!(eff_backend, Some(crate::options::BackendType::Tantivy)) {
// Tantivy DBs have no KV storage; allow SELECT to succeed
Ok(Protocol::SimpleString("OK".to_string()))
} else {
match server.current_storage() {
Ok(_) => Ok(Protocol::SimpleString("OK".to_string())),
Err(e) => Ok(Protocol::err(&e.0)),
}
}
}
@@ -1196,7 +1459,27 @@ async fn dbsize_cmd(server: &Server) -> Result<Protocol, DBError> {
}
async fn info_cmd(server: &Server, section: &Option<String>) -> Result<Protocol, DBError> {
let storage_info = server.current_storage()?.info()?;
// For Tantivy backend, there is no KV storage; synthesize minimal info.
// Determine effective backend for the currently selected db.
let is_tantivy_db = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
let storage_info: Vec<(String, String)> = if is_tantivy_db {
vec![
("db_size".to_string(), "0".to_string()),
("is_encrypted".to_string(), "false".to_string()),
]
} else {
server.current_storage()?.info()?
};
let mut info_map: std::collections::HashMap<String, String> = storage_info.into_iter().collect();
info_map.insert("redis_version".to_string(), "7.0.0".to_string());

View File

@@ -12,3 +12,5 @@ pub mod storage;
pub mod storage_trait;
pub mod storage_sled;
pub mod admin_meta;
pub mod tantivy_search;
pub mod search_cmd;

View File

@@ -2,6 +2,7 @@
pub enum BackendType {
Redb,
Sled,
Tantivy, // Full-text search backend (no KV storage)
}
#[derive(Debug, Clone)]

View File

@@ -14,6 +14,7 @@ use crate::admin_meta;
pub enum BackendType {
Redb,
Sled,
Tantivy, // Full-text search backend (no KV storage)
// Future: InMemory, Custom(String)
}
@@ -112,6 +113,53 @@ pub trait Rpc {
/// Set database public/private status
#[method(name = "setDatabasePublic")]
async fn set_database_public(&self, db_id: u64, public: bool) -> RpcResult<bool>;
// ----- Full-text (Tantivy) minimal RPC endpoints -----
/// Create a new FT index in a Tantivy-backed DB
#[method(name = "ftCreate")]
async fn ft_create(
&self,
db_id: u64,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> RpcResult<bool>;
/// Add or replace a document in an FT index
#[method(name = "ftAdd")]
async fn ft_add(
&self,
db_id: u64,
index_name: String,
doc_id: String,
score: f64,
fields: HashMap<String, String>,
) -> RpcResult<bool>;
/// Search an FT index
#[method(name = "ftSearch")]
async fn ft_search(
&self,
db_id: u64,
index_name: String,
query: String,
filters: Option<Vec<(String, String)>>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> RpcResult<serde_json::Value>;
/// Delete a document by id from an FT index
#[method(name = "ftDel")]
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool>;
/// Get FT index info
#[method(name = "ftInfo")]
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value>;
/// Drop an FT index
#[method(name = "ftDrop")]
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool>;
}
/// RPC Server implementation
@@ -187,13 +235,14 @@ impl RpcServerImpl {
}
// Create server instance with resolved backend
let is_tantivy = matches!(effective_backend, crate::options::BackendType::Tantivy);
let db_option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
debug: false,
encryption_key: None,
encrypt: false,
backend: effective_backend,
backend: effective_backend.clone(),
admin_secret: self.admin_secret.clone(),
};
@@ -203,7 +252,10 @@ impl RpcServerImpl {
server.selected_db = db_id;
// Lazily open/create physical storage according to admin meta (per-db encryption)
let _ = server.current_storage();
// Skip for Tantivy backend (no KV storage to open)
if !is_tantivy {
let _ = server.current_storage();
}
// Store the server
let mut servers = self.servers.write().await;
@@ -290,6 +342,7 @@ impl RpcServerImpl {
let backend = match server.option.backend {
crate::options::BackendType::Redb => BackendType::Redb,
crate::options::BackendType::Sled => BackendType::Sled,
crate::options::BackendType::Tantivy => BackendType::Tantivy,
};
DatabaseInfo {
@@ -340,18 +393,20 @@ impl RpcServer for RpcServerImpl {
let opt_backend = match backend {
BackendType::Redb => crate::options::BackendType::Redb,
BackendType::Sled => crate::options::BackendType::Sled,
BackendType::Tantivy => crate::options::BackendType::Tantivy,
};
admin_meta::set_database_backend(&self.base_dir, self.backend.clone(), &self.admin_secret, db_id, opt_backend.clone())
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
// Create server instance using base_dir, chosen backend and admin secret
let is_tantivy_new = matches!(opt_backend, crate::options::BackendType::Tantivy);
let option = DBOption {
dir: self.base_dir.clone(),
port: 0, // Not used for RPC-managed databases
debug: false,
encryption_key: None, // per-db key is stored in admin DB 0
encrypt: false, // encryption decided per-db at open time
backend: opt_backend,
backend: opt_backend.clone(),
admin_secret: self.admin_secret.clone(),
};
@@ -359,7 +414,10 @@ impl RpcServer for RpcServerImpl {
server.selected_db = db_id;
// Initialize storage to create physical <id>.db with proper encryption from admin meta
let _ = server.current_storage();
// Skip for Tantivy backend (no KV storage to initialize)
if !is_tantivy_new {
let _ = server.current_storage();
}
// Store the server in cache
let mut servers = self.servers.write().await;
@@ -420,6 +478,7 @@ impl RpcServer for RpcServerImpl {
let db_ids = self.discover_databases().await;
let mut stats = HashMap::new();
stats.insert("total_databases".to_string(), serde_json::json!(db_ids.len()));
stats.insert("uptime".to_string(), serde_json::json!(
std::time::SystemTime::now()
@@ -431,6 +490,121 @@ impl RpcServer for RpcServerImpl {
Ok(stats)
}
// ----- Full-text (Tantivy) minimal RPC endpoints -----
async fn ft_create(
&self,
db_id: u64,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_create_cmd(&*server, index_name, schema)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_add(
&self,
db_id: u64,
index_name: String,
doc_id: String,
score: f64,
fields: HashMap<String, String>,
) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_add_cmd(&*server, index_name, doc_id, score, fields)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_search(
&self,
db_id: u64,
index_name: String,
query: String,
filters: Option<Vec<(String, String)>>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> RpcResult<serde_json::Value> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
let proto = crate::search_cmd::ft_search_cmd(
&*server,
index_name,
query,
filters.unwrap_or_default(),
limit,
offset,
return_fields,
)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(serde_json::json!({ "resp": proto.encode() }))
}
async fn ft_del(&self, db_id: u64, index_name: String, doc_id: String) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_del_cmd(&*server, index_name, doc_id)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn ft_info(&self, db_id: u64, index_name: String) -> RpcResult<serde_json::Value> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
let proto = crate::search_cmd::ft_info_cmd(&*server, index_name)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(serde_json::json!({ "resp": proto.encode() }))
}
async fn ft_drop(&self, db_id: u64, index_name: String) -> RpcResult<bool> {
let server = self.get_or_create_server(db_id).await?;
if db_id == 0 {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "FT not allowed on DB 0", None::<()>));
}
if !matches!(server.option.backend, crate::options::BackendType::Tantivy) {
return Err(jsonrpsee::types::ErrorObjectOwned::owned(-32000, "DB backend is not Tantivy", None::<()>));
}
crate::search_cmd::ft_drop_cmd(&*server, index_name)
.await
.map_err(|e| jsonrpsee::types::ErrorObjectOwned::owned(-32000, e.0, None::<()>))?;
Ok(true)
}
async fn add_access_key(&self, db_id: u64, key: String, permissions: String) -> RpcResult<bool> {
let perms = match permissions.to_lowercase().as_str() {
"read" => Permissions::Read,

352
src/search_cmd.rs Normal file
View File

@@ -0,0 +1,352 @@
use crate::{
error::DBError,
protocol::Protocol,
server::Server,
tantivy_search::{
FieldDef, Filter, FilterType, IndexConfig, NumericType, SearchOptions, TantivySearch,
},
};
use std::collections::HashMap;
use std::sync::Arc;
pub async fn ft_create_cmd(
server: &Server,
index_name: String,
schema: Vec<(String, String, Vec<String>)>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
// Parse schema into field definitions
let mut field_definitions = Vec::new();
for (field_name, field_type, options) in schema {
let field_def = match field_type.to_uppercase().as_str() {
"TEXT" => {
let mut sortable = false;
let mut no_index = false;
// Weight is not used in current implementation
let mut _weight = 1.0f32;
let mut i = 0;
while i < options.len() {
match options[i].to_uppercase().as_str() {
"WEIGHT" => {
if i + 1 < options.len() {
_weight = options[i + 1].parse::<f32>().unwrap_or(1.0);
i += 2;
continue;
}
}
"SORTABLE" => {
sortable = true;
}
"NOINDEX" => {
no_index = true;
}
_ => {}
}
i += 1;
}
FieldDef::Text {
stored: true,
indexed: !no_index,
tokenized: true,
fast: sortable,
}
}
"NUMERIC" => {
// default to F64
let mut sortable = false;
for opt in &options {
if opt.to_uppercase() == "SORTABLE" {
sortable = true;
}
}
FieldDef::Numeric {
stored: true,
indexed: true,
fast: sortable,
precision: NumericType::F64,
}
}
"TAG" => {
let mut separator = ",".to_string();
let mut case_sensitive = false;
let mut i = 0;
while i < options.len() {
match options[i].to_uppercase().as_str() {
"SEPARATOR" => {
if i + 1 < options.len() {
separator = options[i + 1].clone();
i += 2;
continue;
}
}
"CASESENSITIVE" => {
case_sensitive = true;
}
_ => {}
}
i += 1;
}
FieldDef::Tag {
stored: true,
separator,
case_sensitive,
}
}
"GEO" => FieldDef::Geo { stored: true },
_ => {
return Err(DBError(format!("Unknown field type: {}", field_type)));
}
};
field_definitions.push((field_name, field_def));
}
// Create the search index
let search_path = server.search_index_path();
let config = IndexConfig::default();
let search_index = TantivySearch::new_with_schema(
search_path,
index_name.clone(),
field_definitions,
Some(config),
)?;
// Store in registry
let mut indexes = server.search_indexes.write().unwrap();
indexes.insert(index_name, Arc::new(search_index));
Ok(Protocol::SimpleString("OK".to_string()))
}
pub async fn ft_add_cmd(
server: &Server,
index_name: String,
doc_id: String,
_score: f64,
fields: HashMap<String, String>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
search_index.add_document_with_fields(&doc_id, fields)?;
Ok(Protocol::SimpleString("OK".to_string()))
}
pub async fn ft_search_cmd(
server: &Server,
index_name: String,
query: String,
filters: Vec<(String, String)>,
limit: Option<usize>,
offset: Option<usize>,
return_fields: Option<Vec<String>>,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
let search_filters = filters
.into_iter()
.map(|(field, value)| Filter {
field,
filter_type: FilterType::Equals(value),
})
.collect();
let options = SearchOptions {
limit: limit.unwrap_or(10),
offset: offset.unwrap_or(0),
filters: search_filters,
sort_by: None,
return_fields,
highlight: false,
};
let results = search_index.search_with_options(&query, options)?;
// Format results as Redis protocol
let mut response = Vec::new();
// First element is the total count
response.push(Protocol::SimpleString(results.total.to_string()));
// Then each document
for doc in results.documents {
let mut doc_array = Vec::new();
// Add document ID if it exists
if let Some(id) = doc.fields.get("_id") {
doc_array.push(Protocol::BulkString(id.clone()));
}
// Add score
doc_array.push(Protocol::BulkString(doc.score.to_string()));
// Add fields as key-value pairs
for (field_name, field_value) in doc.fields {
if field_name != "_id" {
doc_array.push(Protocol::BulkString(field_name));
doc_array.push(Protocol::BulkString(field_value));
}
}
response.push(Protocol::Array(doc_array));
}
Ok(Protocol::Array(response))
}
pub async fn ft_del_cmd(
server: &Server,
index_name: String,
doc_id: String,
) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let _search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
// Not fully implemented yet: Tantivy delete by term would require a writer session and commit coordination.
println!("Deleting document '{}' from index '{}'", doc_id, index_name);
Ok(Protocol::SimpleString("1".to_string()))
}
pub async fn ft_info_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
let indexes = server.search_indexes.read().unwrap();
let search_index = indexes
.get(&index_name)
.ok_or_else(|| DBError(format!("Index '{}' not found", index_name)))?;
let info = search_index.get_info()?;
// Format info as Redis protocol
let mut response = Vec::new();
response.push(Protocol::BulkString("index_name".to_string()));
response.push(Protocol::BulkString(info.name));
response.push(Protocol::BulkString("num_docs".to_string()));
response.push(Protocol::BulkString(info.num_docs.to_string()));
response.push(Protocol::BulkString("num_fields".to_string()));
response.push(Protocol::BulkString(info.fields.len().to_string()));
response.push(Protocol::BulkString("fields".to_string()));
let fields_str = info
.fields
.iter()
.map(|f| format!("{}:{}", f.name, f.field_type))
.collect::<Vec<_>>()
.join(", ");
response.push(Protocol::BulkString(fields_str));
Ok(Protocol::Array(response))
}
pub async fn ft_drop_cmd(server: &Server, index_name: String) -> Result<Protocol, DBError> {
if server.selected_db == 0 {
return Ok(Protocol::err("FT commands are not allowed on DB 0"));
}
// Enforce Tantivy backend for selected DB
let is_tantivy = crate::admin_meta::get_database_backend(
&server.option.dir,
server.option.backend.clone(),
&server.option.admin_secret,
server.selected_db,
)
.ok()
.flatten()
.map(|b| matches!(b, crate::options::BackendType::Tantivy))
.unwrap_or(false);
if !is_tantivy {
return Ok(Protocol::err("ERR DB backend is not Tantivy; FT.* commands are not allowed"));
}
// Remove from registry
{
let mut indexes = server.search_indexes.write().unwrap();
indexes.remove(&index_name);
}
// Remove the index files from disk
let index_path = server.search_index_path().join(&index_name);
if index_path.exists() {
std::fs::remove_dir_all(&index_path)
.map_err(|e| DBError(format!("Failed to remove index files: {}", e)))?;
}
Ok(Protocol::SimpleString("OK".to_string()))
}

View File

@@ -23,6 +23,9 @@ pub struct Server {
pub queued_cmd: Option<Vec<(Cmd, Protocol)>>,
pub current_permissions: Option<crate::rpc::Permissions>,
// In-memory registry of Tantivy search indexes for this server
pub search_indexes: Arc<std::sync::RwLock<HashMap<String, Arc<crate::tantivy_search::TantivySearch>>>>,
// BLPOP waiter registry: per (db_index, key) FIFO of waiters
pub list_waiters: Arc<Mutex<HashMap<u64, HashMap<String, Vec<Waiter>>>>>,
pub waiter_seq: Arc<AtomicU64>,
@@ -49,12 +52,25 @@ impl Server {
selected_db: 0,
queued_cmd: None,
current_permissions: None,
search_indexes: Arc::new(std::sync::RwLock::new(HashMap::new())),
list_waiters: Arc::new(Mutex::new(HashMap::new())),
waiter_seq: Arc::new(AtomicU64::new(1)),
}
}
// Path where search indexes are stored, namespaced per selected DB:
// <base_dir>/search_indexes/<db_id>
pub fn search_index_path(&self) -> std::path::PathBuf {
let base = std::path::PathBuf::from(&self.option.dir)
.join("search_indexes")
.join(self.selected_db.to_string());
if !base.exists() {
let _ = std::fs::create_dir_all(&base);
}
base
}
pub fn current_storage(&self) -> Result<Arc<dyn StorageBackend>, DBError> {
let mut cache = self.db_cache.write().unwrap();

667
src/tantivy_search.rs Normal file
View File

@@ -0,0 +1,667 @@
use crate::error::DBError;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use tantivy::{
collector::TopDocs,
directory::MmapDirectory,
query::{BooleanQuery, Occur, Query, QueryParser, TermQuery},
schema::{
DateOptions, Field, IndexRecordOption, NumericOptions, Schema, TextFieldIndexing, TextOptions, STORED, STRING,
},
tokenizer::TokenizerManager,
DateTime, Index, IndexReader, IndexWriter, TantivyDocument, Term,
};
use tantivy::schema::Value;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FieldDef {
Text {
stored: bool,
indexed: bool,
tokenized: bool,
fast: bool,
},
Numeric {
stored: bool,
indexed: bool,
fast: bool,
precision: NumericType,
},
Tag {
stored: bool,
separator: String,
case_sensitive: bool,
},
Geo {
stored: bool,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum NumericType {
I64,
U64,
F64,
Date,
}
pub struct IndexSchema {
schema: Schema,
fields: HashMap<String, (Field, FieldDef)>,
default_search_fields: Vec<Field>,
}
pub struct TantivySearch {
index: Index,
writer: Arc<RwLock<IndexWriter>>,
reader: IndexReader,
index_schema: IndexSchema,
name: String,
config: IndexConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexConfig {
pub language: String,
pub stopwords: Vec<String>,
pub stemming: bool,
pub max_doc_count: Option<usize>,
pub default_score: f64,
}
impl Default for IndexConfig {
fn default() -> Self {
IndexConfig {
language: "english".to_string(),
stopwords: vec![],
stemming: true,
max_doc_count: None,
default_score: 1.0,
}
}
}
impl TantivySearch {
pub fn new_with_schema(
base_path: PathBuf,
name: String,
field_definitions: Vec<(String, FieldDef)>,
config: Option<IndexConfig>,
) -> Result<Self, DBError> {
let index_path = base_path.join(&name);
std::fs::create_dir_all(&index_path)
.map_err(|e| DBError(format!("Failed to create index dir: {}", e)))?;
// Build schema from field definitions
let mut schema_builder = Schema::builder();
let mut fields = HashMap::new();
let mut default_search_fields = Vec::new();
// Always add a document ID field
let id_field = schema_builder.add_text_field("_id", STRING | STORED);
fields.insert(
"_id".to_string(),
(
id_field,
FieldDef::Text {
stored: true,
indexed: true,
tokenized: false,
fast: false,
},
),
);
// Add user-defined fields
for (field_name, field_def) in field_definitions {
let field = match &field_def {
FieldDef::Text {
stored,
indexed,
tokenized,
fast: _fast,
} => {
let mut text_options = TextOptions::default();
if *stored {
text_options = text_options.set_stored();
}
if *indexed {
let indexing_options = if *tokenized {
TextFieldIndexing::default()
.set_tokenizer("default")
.set_index_option(IndexRecordOption::WithFreqsAndPositions)
} else {
TextFieldIndexing::default()
.set_tokenizer("raw")
.set_index_option(IndexRecordOption::Basic)
};
text_options = text_options.set_indexing_options(indexing_options);
let f = schema_builder.add_text_field(&field_name, text_options);
if *tokenized {
default_search_fields.push(f);
}
f
} else {
schema_builder.add_text_field(&field_name, text_options)
}
}
FieldDef::Numeric {
stored,
indexed,
fast,
precision,
} => match precision {
NumericType::I64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_i64_field(&field_name, opts)
}
NumericType::U64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_u64_field(&field_name, opts)
}
NumericType::F64 => {
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_f64_field(&field_name, opts)
}
NumericType::Date => {
let mut opts = DateOptions::default();
if *stored {
opts = opts.set_stored();
}
if *indexed {
opts = opts.set_indexed();
}
if *fast {
opts = opts.set_fast();
}
schema_builder.add_date_field(&field_name, opts)
}
},
FieldDef::Tag {
stored,
separator: _,
case_sensitive: _,
} => {
let mut text_options = TextOptions::default();
if *stored {
text_options = text_options.set_stored();
}
text_options = text_options.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("raw")
.set_index_option(IndexRecordOption::Basic),
);
schema_builder.add_text_field(&field_name, text_options)
}
FieldDef::Geo { stored } => {
// For now, store as two f64 fields for lat/lon
let mut opts = NumericOptions::default();
if *stored {
opts = opts.set_stored();
}
opts = opts.set_indexed().set_fast();
let lat_field =
schema_builder.add_f64_field(&format!("{}_lat", field_name), opts.clone());
let lon_field =
schema_builder.add_f64_field(&format!("{}_lon", field_name), opts);
fields.insert(
format!("{}_lat", field_name),
(
lat_field,
FieldDef::Numeric {
stored: *stored,
indexed: true,
fast: true,
precision: NumericType::F64,
},
),
);
fields.insert(
format!("{}_lon", field_name),
(
lon_field,
FieldDef::Numeric {
stored: *stored,
indexed: true,
fast: true,
precision: NumericType::F64,
},
),
);
continue; // Skip adding the geo field itself
}
};
fields.insert(field_name.clone(), (field, field_def));
}
let schema = schema_builder.build();
let index_schema = IndexSchema {
schema: schema.clone(),
fields,
default_search_fields,
};
// Create or open index
let dir = MmapDirectory::open(&index_path)
.map_err(|e| DBError(format!("Failed to open index directory: {}", e)))?;
let mut index =
Index::open_or_create(dir, schema).map_err(|e| DBError(format!("Failed to create index: {}", e)))?;
// Configure tokenizers
let tokenizer_manager = TokenizerManager::default();
index.set_tokenizers(tokenizer_manager);
let writer = index
.writer(15_000_000)
.map_err(|e| DBError(format!("Failed to create index writer: {}", e)))?;
let reader = index
.reader()
.map_err(|e| DBError(format!("Failed to create reader: {}", e)))?;
let config = config.unwrap_or_default();
Ok(TantivySearch {
index,
writer: Arc::new(RwLock::new(writer)),
reader,
index_schema,
name,
config,
})
}
pub fn add_document_with_fields(
&self,
doc_id: &str,
fields: HashMap<String, String>,
) -> Result<(), DBError> {
let mut writer = self
.writer
.write()
.map_err(|e| DBError(format!("Failed to acquire writer lock: {}", e)))?;
// Delete existing document with same ID
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
writer.delete_term(Term::from_field_text(*id_field, doc_id));
}
// Create new document
let mut doc = tantivy::doc!();
// Add document ID
if let Some((id_field, _)) = self.index_schema.fields.get("_id") {
doc.add_text(*id_field, doc_id);
}
// Add other fields based on schema
for (field_name, field_value) in fields {
if let Some((field, field_def)) = self.index_schema.fields.get(&field_name) {
match field_def {
FieldDef::Text { .. } => {
doc.add_text(*field, &field_value);
}
FieldDef::Numeric { precision, .. } => match precision {
NumericType::I64 => {
if let Ok(v) = field_value.parse::<i64>() {
doc.add_i64(*field, v);
}
}
NumericType::U64 => {
if let Ok(v) = field_value.parse::<u64>() {
doc.add_u64(*field, v);
}
}
NumericType::F64 => {
if let Ok(v) = field_value.parse::<f64>() {
doc.add_f64(*field, v);
}
}
NumericType::Date => {
if let Ok(v) = field_value.parse::<i64>() {
doc.add_date(*field, DateTime::from_timestamp_millis(v));
}
}
},
FieldDef::Tag {
separator,
case_sensitive,
..
} => {
let tags = if !case_sensitive {
field_value.to_lowercase()
} else {
field_value.clone()
};
for tag in tags.split(separator.as_str()) {
doc.add_text(*field, tag.trim());
}
}
FieldDef::Geo { .. } => {
let parts: Vec<&str> = field_value.split(',').collect();
if parts.len() == 2 {
if let (Ok(lat), Ok(lon)) =
(parts[0].parse::<f64>(), parts[1].parse::<f64>())
{
if let Some((lat_field, _)) =
self.index_schema.fields.get(&format!("{}_lat", field_name))
{
doc.add_f64(*lat_field, lat);
}
if let Some((lon_field, _)) =
self.index_schema.fields.get(&format!("{}_lon", field_name))
{
doc.add_f64(*lon_field, lon);
}
}
}
}
}
}
}
writer
.add_document(doc)
.map_err(|e| DBError(format!("Failed to add document: {}", e)))?;
writer
.commit()
.map_err(|e| DBError(format!("Failed to commit: {}", e)))?;
Ok(())
}
pub fn search_with_options(
&self,
query_str: &str,
options: SearchOptions,
) -> Result<SearchResults, DBError> {
let searcher = self.reader.searcher();
// Ensure we have searchable fields
if self.index_schema.default_search_fields.is_empty() {
return Err(DBError("No searchable fields defined in schema".to_string()));
}
// Parse query based on search fields
let query_parser = QueryParser::for_index(
&self.index,
self.index_schema.default_search_fields.clone(),
);
let parsed_query = query_parser
.parse_query(query_str)
.map_err(|e| DBError(format!("Failed to parse query: {}", e)))?;
let mut clauses: Vec<(Occur, Box<dyn Query>)> = vec![(Occur::Must, parsed_query)];
// Apply filters if any
for filter in options.filters {
if let Some((field, field_def)) = self.index_schema.fields.get(&filter.field) {
match filter.filter_type {
FilterType::Equals(value) => {
match field_def {
FieldDef::Text { .. } | FieldDef::Tag { .. } => {
let term_query =
TermQuery::new(Term::from_field_text(*field, &value), IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(term_query)));
}
FieldDef::Numeric { precision, .. } => {
// Equals on numeric fields: parse to the right numeric type and use term query
match precision {
NumericType::I64 => {
if let Ok(v) = value.parse::<i64>() {
let term = Term::from_field_i64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::U64 => {
if let Ok(v) = value.parse::<u64>() {
let term = Term::from_field_u64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::F64 => {
if let Ok(v) = value.parse::<f64>() {
let term = Term::from_field_f64(*field, v);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
NumericType::Date => {
if let Ok(v) = value.parse::<i64>() {
let dt = DateTime::from_timestamp_millis(v);
let term = Term::from_field_date(*field, dt);
let tq = TermQuery::new(term, IndexRecordOption::Basic);
clauses.push((Occur::Must, Box::new(tq)));
}
}
}
}
FieldDef::Geo { .. } => {
// Geo equals isn't supported in this simplified version
}
}
}
FilterType::Range { .. } => {
// TODO: Implement numeric range queries by building a RangeQuery per type
}
FilterType::InSet(values) => {
// OR across values
let mut sub_clauses: Vec<(Occur, Box<dyn Query>)> = vec![];
for value in values {
let term_query = TermQuery::new(
Term::from_field_text(*field, &value),
IndexRecordOption::Basic,
);
sub_clauses.push((Occur::Should, Box::new(term_query)));
}
clauses.push((Occur::Must, Box::new(BooleanQuery::new(sub_clauses))));
}
}
}
}
let final_query: Box<dyn Query> = if clauses.len() == 1 {
clauses.pop().unwrap().1
} else {
Box::new(BooleanQuery::new(clauses))
};
// Execute search
let top_docs = searcher
.search(&*final_query, &TopDocs::with_limit(options.limit + options.offset))
.map_err(|e| DBError(format!("Search failed: {}", e)))?;
let total_hits = top_docs.len();
let mut documents = Vec::new();
for (score, doc_address) in top_docs.into_iter().skip(options.offset).take(options.limit) {
let retrieved_doc: TantivyDocument = searcher
.doc(doc_address)
.map_err(|e| DBError(format!("Failed to retrieve doc: {}", e)))?;
let mut doc_fields = HashMap::new();
// Extract stored fields (or synthesize)
for (field_name, (field, field_def)) in &self.index_schema.fields {
match field_def {
FieldDef::Text { stored, .. } | FieldDef::Tag { stored, .. } => {
if *stored {
if let Some(value) = retrieved_doc.get_first(*field) {
if let Some(text) = value.as_str() {
doc_fields.insert(field_name.clone(), text.to_string());
}
}
}
}
FieldDef::Numeric {
stored, precision, ..
} => {
if *stored {
let value_str = match precision {
NumericType::I64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_i64())
.map(|v| v.to_string()),
NumericType::U64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_u64())
.map(|v| v.to_string()),
NumericType::F64 => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_f64())
.map(|v| v.to_string()),
NumericType::Date => retrieved_doc
.get_first(*field)
.and_then(|v| v.as_datetime())
.map(|v| v.into_timestamp_millis().to_string()),
};
if let Some(v) = value_str {
doc_fields.insert(field_name.clone(), v);
}
}
}
FieldDef::Geo { stored } => {
if *stored {
let lat_field = self
.index_schema
.fields
.get(&format!("{}_lat", field_name))
.unwrap()
.0;
let lon_field = self
.index_schema
.fields
.get(&format!("{}_lon", field_name))
.unwrap()
.0;
let lat = retrieved_doc.get_first(lat_field).and_then(|v| v.as_f64());
let lon = retrieved_doc.get_first(lon_field).and_then(|v| v.as_f64());
if let (Some(lat), Some(lon)) = (lat, lon) {
doc_fields.insert(field_name.clone(), format!("{},{}", lat, lon));
}
}
}
}
}
documents.push(SearchDocument {
fields: doc_fields,
score,
});
}
Ok(SearchResults {
total: total_hits,
documents,
})
}
pub fn get_info(&self) -> Result<IndexInfo, DBError> {
let searcher = self.reader.searcher();
let num_docs = searcher.num_docs();
let fields_info: Vec<FieldInfo> = self
.index_schema
.fields
.iter()
.map(|(name, (_, def))| FieldInfo {
name: name.clone(),
field_type: format!("{:?}", def),
})
.collect();
Ok(IndexInfo {
name: self.name.clone(),
num_docs,
fields: fields_info,
config: self.config.clone(),
})
}
}
#[derive(Debug, Clone)]
pub struct SearchOptions {
pub limit: usize,
pub offset: usize,
pub filters: Vec<Filter>,
pub sort_by: Option<String>,
pub return_fields: Option<Vec<String>>,
pub highlight: bool,
}
impl Default for SearchOptions {
fn default() -> Self {
SearchOptions {
limit: 10,
offset: 0,
filters: vec![],
sort_by: None,
return_fields: None,
highlight: false,
}
}
}
#[derive(Debug, Clone)]
pub struct Filter {
pub field: String,
pub filter_type: FilterType,
}
#[derive(Debug, Clone)]
pub enum FilterType {
Equals(String),
Range { min: String, max: String },
InSet(Vec<String>),
}
#[derive(Debug)]
pub struct SearchResults {
pub total: usize,
pub documents: Vec<SearchDocument>,
}
#[derive(Debug)]
pub struct SearchDocument {
pub fields: HashMap<String, String>,
pub score: f32,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct IndexInfo {
pub name: String,
pub num_docs: u64,
pub fields: Vec<FieldInfo>,
pub config: IndexConfig,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct FieldInfo {
pub name: String,
pub field_type: String,
}

View File

@@ -1,10 +1,11 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# Test script for HeroDB - Redis-compatible database with redb backend
# This script starts the server and runs comprehensive tests
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'