feat: adjust code indexing logic (#510)

wsxiaoys-patch-1
Meng Zhang 2023-10-05 13:29:41 +08:00 committed by GitHub
parent 1babc38902
commit 9cd2accbaa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 243 additions and 113 deletions

273
Cargo.lock generated
View File

@ -28,17 +28,6 @@ dependencies = [
"cpufeatures", "cpufeatures",
] ]
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]] [[package]]
name = "ahash" name = "ahash"
version = "0.8.3" version = "0.8.3"
@ -206,7 +195,7 @@ checksum = "f8175979259124331c1d7bf6586ee7e0da434155e4b2d48ec2c8386281d8df39"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"axum-core", "axum-core",
"bitflags", "bitflags 1.3.2",
"bytes", "bytes",
"futures-util", "futures-util",
"http", "http",
@ -322,6 +311,12 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
[[package]] [[package]]
name = "bitpacking" name = "bitpacking"
version = "0.8.4" version = "0.8.4"
@ -385,7 +380,7 @@ version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cead8ece0da6b744b2ad8ef9c58a4cdc7ef2921e60a6ddfb9eaaa86839b5fc5" checksum = "8cead8ece0da6b744b2ad8ef9c58a4cdc7ef2921e60a6ddfb9eaaa86839b5fc5"
dependencies = [ dependencies = [
"ahash 0.8.3", "ahash",
"async-trait", "async-trait",
"cached_proc_macro", "cached_proc_macro",
"cached_proc_macro_types", "cached_proc_macro_types",
@ -508,7 +503,7 @@ checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990"
dependencies = [ dependencies = [
"anstream", "anstream",
"anstyle", "anstyle",
"bitflags", "bitflags 1.3.2",
"clap_lex", "clap_lex",
"strsim 0.10.0", "strsim 0.10.0",
] ]
@ -556,15 +551,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "combine"
version = "4.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "console" name = "console"
version = "0.15.7" version = "0.15.7"
@ -970,37 +956,12 @@ dependencies = [
"cc", "cc",
] ]
[[package]]
name = "fail"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c"
dependencies = [
"log",
"once_cell",
"rand",
]
[[package]] [[package]]
name = "fastdivide" name = "fastdivide"
version = "0.4.0" version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04" checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04"
[[package]]
name = "fastfield_codecs"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "374a3a53c1bd5fb31b10084229290eafb0a05f260ec90f1f726afffda4877a8a"
dependencies = [
"fastdivide",
"itertools 0.10.5",
"log",
"ownedbytes",
"tantivy-bitpacker",
"tantivy-common",
]
[[package]] [[package]]
name = "fastrand" name = "fastrand"
version = "1.9.0" version = "1.9.0"
@ -1097,6 +1058,16 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "fs4"
version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eeb4ed9e12f43b7fa0baae3f9cdda28352770132ef2e09a23760c29cae8bd47"
dependencies = [
"rustix 0.38.17",
"windows-sys 0.48.0",
]
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.28" version = "0.3.28"
@ -1256,9 +1227,6 @@ name = "hashbrown"
version = "0.12.3" version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash 0.7.6",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
@ -1266,7 +1234,7 @@ version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
dependencies = [ dependencies = [
"ahash 0.8.3", "ahash",
"allocator-api2", "allocator-api2",
] ]
@ -1540,7 +1508,7 @@ checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
dependencies = [ dependencies = [
"hermit-abi 0.3.1", "hermit-abi 0.3.1",
"io-lifetimes", "io-lifetimes",
"rustix", "rustix 0.37.19",
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
@ -1571,6 +1539,15 @@ dependencies = [
"either", "either",
] ]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.6" version = "1.0.6"
@ -1620,9 +1597,9 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.144" version = "0.2.148"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
[[package]] [[package]]
name = "libloading" name = "libloading"
@ -1649,6 +1626,12 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
[[package]]
name = "linux-raw-sys"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3852614a3bd9ca9804678ba6be5e3b8ce76dfc902cae004e3e0c44051b6e88db"
[[package]] [[package]]
name = "llama-cpp-bindings" name = "llama-cpp-bindings"
version = "0.2.0" version = "0.2.0"
@ -1701,18 +1684,18 @@ dependencies = [
[[package]] [[package]]
name = "lru" name = "lru"
version = "0.7.8" version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a" checksum = "a4a83fb7698b3643a0e34f9ae6f2e8f0178c0fd42f8b59d493aa271ff3a5bf21"
dependencies = [ dependencies = [
"hashbrown 0.12.3", "hashbrown 0.14.0",
] ]
[[package]] [[package]]
name = "lz4_flex" name = "lz4_flex"
version = "0.9.5" version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3" checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8"
[[package]] [[package]]
name = "macro_rules_attribute" name = "macro_rules_attribute"
@ -1772,9 +1755,9 @@ checksum = "5486aed0026218e61b8a01d5fbd5a0a134649abb71a0e53b7bc088529dced86e"
[[package]] [[package]]
name = "memmap2" name = "memmap2"
version = "0.5.10" version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
dependencies = [ dependencies = [
"libc", "libc",
] ]
@ -1886,12 +1869,9 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
[[package]] [[package]]
name = "murmurhash32" name = "murmurhash32"
version = "0.2.0" version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a" checksum = "d9380db4c04d219ac5c51d14996bbf2c2e9a15229771b53f8671eb6c83cf44df"
dependencies = [
"byteorder",
]
[[package]] [[package]]
name = "native-tls" name = "native-tls"
@ -1995,7 +1975,7 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cd21b9f5a1cce3c3515c9ffa85f5c7443e07162dae0ccf4339bb7ca38ad3454" checksum = "7cd21b9f5a1cce3c3515c9ffa85f5c7443e07162dae0ccf4339bb7ca38ad3454"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
"libloading", "libloading",
"nvml-wrapper-sys", "nvml-wrapper-sys",
"static_assertions", "static_assertions",
@ -2042,7 +2022,7 @@ version = "6.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
"libc", "libc",
"once_cell", "once_cell",
"onig_sys", "onig_sys",
@ -2064,7 +2044,7 @@ version = "0.10.52"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56" checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
"cfg-if", "cfg-if",
"foreign-types", "foreign-types",
"libc", "libc",
@ -2190,9 +2170,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]] [[package]]
name = "ownedbytes" name = "ownedbytes"
version = "0.4.0" version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e957eaa64a299f39755416e5b3128c505e9d63a91d0453771ad2ccd3907f8db" checksum = "6e8a72b918ae8198abb3a18c190288123e1d442b6b9a7d709305fd194688b4b7"
dependencies = [ dependencies = [
"stable_deref_trait", "stable_deref_trait",
] ]
@ -2499,7 +2479,7 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
] ]
[[package]] [[package]]
@ -2508,7 +2488,7 @@ version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
] ]
[[package]] [[package]]
@ -2728,11 +2708,24 @@ version = "0.37.19"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
"errno", "errno",
"io-lifetimes", "io-lifetimes",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys 0.3.8",
"windows-sys 0.48.0",
]
[[package]]
name = "rustix"
version = "0.38.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
dependencies = [
"bitflags 2.4.0",
"errno",
"libc",
"linux-raw-sys 0.4.8",
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
@ -2790,7 +2783,7 @@ version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8" checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
"core-foundation", "core-foundation",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
@ -2946,6 +2939,15 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "sketches-ddsketch"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68a406c1882ed7f29cd5e248c9848a80e7cb6ae0fea82346d2746f2f941c07e1"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "slab" name = "slab"
version = "0.4.8" version = "0.4.8"
@ -3202,26 +3204,24 @@ dependencies = [
[[package]] [[package]]
name = "tantivy" name = "tantivy"
version = "0.19.2" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bb26a6b22c84d8be41d99a14016d6f04d30d8d31a2ea411a8ab553af5cc490d" checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae"
dependencies = [ dependencies = [
"aho-corasick 0.7.20", "aho-corasick 1.0.1",
"arc-swap", "arc-swap",
"async-trait", "async-trait",
"base64 0.13.1", "base64 0.21.2",
"bitpacking", "bitpacking",
"byteorder", "byteorder",
"census", "census",
"crc32fast", "crc32fast",
"crossbeam-channel", "crossbeam-channel",
"downcast-rs", "downcast-rs",
"fail",
"fastdivide", "fastdivide",
"fastfield_codecs", "fs4",
"fs2",
"htmlescape", "htmlescape",
"itertools 0.10.5", "itertools 0.11.0",
"levenshtein_automata", "levenshtein_automata",
"log", "log",
"lru", "lru",
@ -3232,19 +3232,21 @@ dependencies = [
"num_cpus", "num_cpus",
"once_cell", "once_cell",
"oneshot", "oneshot",
"ownedbytes",
"rayon", "rayon",
"regex", "regex",
"rust-stemmers", "rust-stemmers",
"rustc-hash", "rustc-hash",
"serde", "serde",
"serde_json", "serde_json",
"sketches-ddsketch",
"smallvec", "smallvec",
"stable_deref_trait",
"tantivy-bitpacker", "tantivy-bitpacker",
"tantivy-columnar",
"tantivy-common", "tantivy-common",
"tantivy-fst", "tantivy-fst",
"tantivy-query-grammar", "tantivy-query-grammar",
"tantivy-stacker",
"tantivy-tokenizer-api",
"tempfile", "tempfile",
"thiserror", "thiserror",
"time 0.3.26", "time 0.3.26",
@ -3254,18 +3256,40 @@ dependencies = [
[[package]] [[package]]
name = "tantivy-bitpacker" name = "tantivy-bitpacker"
version = "0.3.0" version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e71a0c95b82d4292b097a09b989a6380d28c3a86800c841a2d03bae1fc8b9fa6" checksum = "cecb164321482301f514dd582264fa67f70da2d7eb01872ccd71e35e0d96655a"
dependencies = [
"bitpacking",
]
[[package]]
name = "tantivy-columnar"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d85f8019af9a78b3118c11298b36ffd21c2314bd76bbcd9d12e00124cbb7e70"
dependencies = [
"fastdivide",
"fnv",
"itertools 0.11.0",
"serde",
"tantivy-bitpacker",
"tantivy-common",
"tantivy-sstable",
"tantivy-stacker",
]
[[package]] [[package]]
name = "tantivy-common" name = "tantivy-common"
version = "0.4.0" version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14fef4182bb60df9a4b92cd8ecab39ba2e50a05542934af17eef1f49660705cb" checksum = "af4a3a975e604a2aba6b1106a04505e1e7a025e6def477fab6e410b4126471e1"
dependencies = [ dependencies = [
"async-trait",
"byteorder", "byteorder",
"ownedbytes", "ownedbytes",
"serde",
"time 0.3.26",
] ]
[[package]] [[package]]
@ -3281,13 +3305,41 @@ dependencies = [
[[package]] [[package]]
name = "tantivy-query-grammar" name = "tantivy-query-grammar"
version = "0.19.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "343e3ada4c1c480953f6960f8a21ce9c76611480ffdd4f4e230fdddce0fc5331" checksum = "1d39c5a03100ac10c96e0c8b07538e2ab8b17da56434ab348309b31f23fada77"
dependencies = [ dependencies = [
"combine", "nom 7.1.3",
"once_cell", ]
"regex",
[[package]]
name = "tantivy-sstable"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0c1bb43e5e8b8e05eb8009610344dbf285f06066c844032fbb3e546b3c71df"
dependencies = [
"tantivy-common",
"tantivy-fst",
"zstd 0.12.4",
]
[[package]]
name = "tantivy-stacker"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2c078595413f13f218cf6f97b23dcfd48936838f1d3d13a1016e05acd64ed6c"
dependencies = [
"murmurhash32",
"tantivy-common",
]
[[package]]
name = "tantivy-tokenizer-api"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "347b6fb212b26d3505d224f438e3c4b827ab8bd847fe9953ad5ac6b8f9443b66"
dependencies = [
"serde",
] ]
[[package]] [[package]]
@ -3316,7 +3368,7 @@ dependencies = [
"cfg-if", "cfg-if",
"fastrand", "fastrand",
"redox_syscall 0.3.5", "redox_syscall 0.3.5",
"rustix", "rustix 0.37.19",
"windows-sys 0.45.0", "windows-sys 0.45.0",
] ]
@ -3642,7 +3694,7 @@ version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
"bytes", "bytes",
"futures-core", "futures-core",
"futures-util", "futures-util",
@ -3661,7 +3713,7 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d1d42a9b3f3ec46ba828e8d376aec14592ea199f70a06a548587ecd1c4ab658" checksum = "5d1d42a9b3f3ec46ba828e8d376aec14592ea199f70a06a548587ecd1c4ab658"
dependencies = [ dependencies = [
"bitflags", "bitflags 1.3.2",
"bytes", "bytes",
"futures-core", "futures-core",
"futures-util", "futures-util",
@ -4462,7 +4514,7 @@ dependencies = [
"pbkdf2", "pbkdf2",
"sha1", "sha1",
"time 0.3.26", "time 0.3.26",
"zstd", "zstd 0.11.2+zstd.1.5.2",
] ]
[[package]] [[package]]
@ -4471,7 +4523,16 @@ version = "0.11.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
dependencies = [ dependencies = [
"zstd-safe", "zstd-safe 5.0.2+zstd.1.5.2",
]
[[package]]
name = "zstd"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c"
dependencies = [
"zstd-safe 6.0.6",
] ]
[[package]] [[package]]
@ -4484,6 +4545,16 @@ dependencies = [
"zstd-sys", "zstd-sys",
] ]
[[package]]
name = "zstd-safe"
version = "6.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581"
dependencies = [
"libc",
"zstd-sys",
]
[[package]] [[package]]
name = "zstd-sys" name = "zstd-sys"
version = "2.0.8+zstd.1.5.5" version = "2.0.8+zstd.1.5.5"

View File

@ -29,7 +29,7 @@ tracing = "0.1"
tracing-subscriber = "0.3" tracing-subscriber = "0.3"
anyhow = "1.0.71" anyhow = "1.0.71"
serde-jsonlines = "0.4.0" serde-jsonlines = "0.4.0"
tantivy = "0.19.2" tantivy = "0.21.0"
async-trait = "0.1.72" async-trait = "0.1.72"
reqwest = { version = "0.11.18" } reqwest = { version = "0.11.18" }
derive_builder = "0.12.0" derive_builder = "0.12.0"

View File

@ -5,29 +5,54 @@ use tabby_common::{config::Config, path::index_dir, SourceFile};
use tantivy::{ use tantivy::{
directory::MmapDirectory, directory::MmapDirectory,
doc, doc,
schema::{Schema, STORED, STRING, TEXT}, schema::{Schema, TextFieldIndexing, TextOptions, STORED, STRING},
tokenizer::{RegexTokenizer, RemoveLongFilter, TextAnalyzer},
Index, Index,
}; };
// Magic numbers
static MAX_LINE_LENGTH_THRESHOLD: usize = 300;
static AVG_LINE_LENGTH_THRESHOLD: f32 = 150f32;
pub fn index_repositories(_config: &Config) -> Result<()> { pub fn index_repositories(_config: &Config) -> Result<()> {
let mut builder = Schema::builder(); let mut builder = Schema::builder();
let code_indexing_options = TextFieldIndexing::default()
.set_tokenizer("code")
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
let code_options = TextOptions::default()
.set_indexing_options(code_indexing_options)
.set_stored();
let field_git_url = builder.add_text_field("git_url", STRING | STORED); let field_git_url = builder.add_text_field("git_url", STRING | STORED);
let field_filepath = builder.add_text_field("filepath", STRING | STORED); let field_filepath = builder.add_text_field("filepath", STRING | STORED);
let field_language = builder.add_text_field("language", STRING | STORED); let field_language = builder.add_text_field("language", STRING | STORED);
let field_name = builder.add_text_field("name", STRING | STORED); let field_name = builder.add_text_field("name", STRING | STORED);
let field_kind = builder.add_text_field("kind", STRING | STORED); let field_kind = builder.add_text_field("kind", STRING | STORED);
let field_body = builder.add_text_field("body", TEXT | STORED); let field_body = builder.add_text_field("body", code_options);
let schema = builder.build(); let schema = builder.build();
fs::create_dir_all(index_dir())?; fs::create_dir_all(index_dir())?;
let directory = MmapDirectory::open(index_dir())?; let directory = MmapDirectory::open(index_dir())?;
let index = Index::open_or_create(directory, schema)?; let index = Index::open_or_create(directory, schema)?;
let code_tokenizer = TextAnalyzer::builder(RegexTokenizer::new(r"(?:\w*)").unwrap())
.filter(RemoveLongFilter::limit(40))
.build();
index.tokenizers().register("code", code_tokenizer);
let mut writer = index.writer(10_000_000)?; let mut writer = index.writer(10_000_000)?;
writer.delete_all_documents()?; writer.delete_all_documents()?;
for file in SourceFile::all()? { for file in SourceFile::all()? {
if file.max_line_length > MAX_LINE_LENGTH_THRESHOLD {
continue;
}
if file.avg_line_length > AVG_LINE_LENGTH_THRESHOLD {
continue;
}
for doc in from_source_file(file) { for doc in from_source_file(file) {
writer.add_document(doc!( writer.add_document(doc!(
field_git_url => doc.git_url, field_git_url => doc.git_url,
@ -141,8 +166,18 @@ mod tests {
assert_eq!(docs[0].name, "ConstantLengthDataset"); assert_eq!(docs[0].name, "ConstantLengthDataset");
assert_eq!(docs[0].kind, "class"); assert_eq!(docs[0].kind, "class");
assert!(
docs[0].body.starts_with("class ConstantLengthDataset"),
"body: {:?}",
docs[0].body
);
assert_eq!(docs[1].name, "__init__"); assert_eq!(docs[1].name, "__init__");
assert_eq!(docs[1].kind, "function"); assert_eq!(docs[1].kind, "function");
assert!(
docs[1].body.starts_with("def __init__"),
"body: {:?}",
docs[1].body
);
} }
} }

View File

@ -1,6 +1,6 @@
use std::collections::HashMap; use std::collections::HashMap;
use anyhow::{anyhow, Result}; use anyhow::Result;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use strfmt::strfmt; use strfmt::strfmt;
use tabby_common::path::index_dir; use tabby_common::path::index_dir;
@ -186,14 +186,8 @@ impl IndexState {
.reader_builder() .reader_builder()
.reload_policy(ReloadPolicy::OnCommit) .reload_policy(ReloadPolicy::OnCommit)
.try_into()?; .try_into()?;
let field_name = index let field_name = index.schema().get_field("name")?;
.schema() let field_body = index.schema().get_field("body")?;
.get_field("name")
.ok_or(anyhow!("Index doesn't have required field"))?;
let field_body = index
.schema()
.get_field("body")
.ok_or(anyhow!("Index doesn't have required field"))?;
let query_parser = QueryParser::for_index(&index, vec![field_body]); let query_parser = QueryParser::for_index(&index, vec![field_body]);
Ok(Self { Ok(Self {
searcher: reader.searcher(), searcher: reader.searcher(),

View File

@ -0,0 +1,30 @@
import requests
import streamlit as st
from typing import NamedTuple
class Doc(NamedTuple):
name: str
body: str
score: float
@staticmethod
def from_json(json: dict):
doc = json["doc"]
return Doc(
name=doc["name"][0],
body=doc["body"][0],
score=json["score"]
)
# force wide mode
st.set_page_config(layout="wide")
query = st.text_input("Query")
if query:
r = requests.get("http://localhost:3000/api", params=dict(q=query))
hits = r.json()["hits"]
for x in hits:
doc = Doc.from_json(x)
st.write(doc.name + " : " + str(doc.score))
st.code(doc.body)