feat: add indexer [TAB-17] (#199)

* add basic indexer

* formatting
docs-add-demo
Meng Zhang 2023-06-05 15:18:10 -07:00 committed by GitHub
parent 272dde9769
commit 249d51d0f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 617 additions and 68 deletions

437
Cargo.lock generated
View File

@ -28,6 +28,17 @@ dependencies = [
"cpufeatures",
]
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "0.7.20"
@ -61,6 +72,15 @@ dependencies = [
"libc",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "anstream"
version = "0.3.2"
@ -116,6 +136,12 @@ version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
[[package]]
name = "arc-swap"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
[[package]]
name = "async-trait"
version = "0.1.68"
@ -221,6 +247,15 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitpacking"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7"
dependencies = [
"crunchy",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
@ -300,6 +335,12 @@ dependencies = [
"jobserver",
]
[[package]]
name = "census"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fafee10a5dd1cffcb5cc560e0d0df8803d7355a2b12272e3557dee57314cb6e"
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -398,6 +439,15 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "combine"
version = "4.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
dependencies = [
"memchr",
]
[[package]]
name = "console"
version = "0.15.7"
@ -505,6 +555,12 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "crypto-common"
version = "0.1.6"
@ -670,6 +726,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "downcast-rs"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650"
[[package]]
name = "either"
version = "1.8.1"
@ -730,6 +792,37 @@ dependencies = [
"cc",
]
[[package]]
name = "fail"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c"
dependencies = [
"log",
"once_cell",
"rand",
]
[[package]]
name = "fastdivide"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04"
[[package]]
name = "fastfield_codecs"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "374a3a53c1bd5fb31b10084229290eafb0a05f260ec90f1f726afffda4877a8a"
dependencies = [
"fastdivide",
"itertools 0.10.5",
"log",
"ownedbytes",
"tantivy-bitpacker",
"tantivy-common",
]
[[package]]
name = "fastrand"
version = "1.9.0"
@ -871,6 +964,19 @@ dependencies = [
"slab",
]
[[package]]
name = "generator"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3e123d9ae7c02966b4d892e550bdc32164f05853cd40ab570650ad600596a8a"
dependencies = [
"cc",
"libc",
"log",
"rustversion",
"windows",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@ -928,6 +1034,9 @@ name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
]
[[package]]
name = "heck"
@ -959,6 +1068,12 @@ dependencies = [
"digest",
]
[[package]]
name = "htmlescape"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
[[package]]
name = "http"
version = "0.2.9"
@ -1138,6 +1253,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
"js-sys",
"wasm-bindgen",
"web-sys",
]
[[package]]
@ -1187,6 +1305,15 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.6"
@ -1228,6 +1355,12 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "levenshtein_automata"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25"
[[package]]
name = "libc"
version = "0.2.144"
@ -1268,6 +1401,35 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "loom"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5"
dependencies = [
"cfg-if",
"generator",
"pin-utils",
"scoped-tls",
"tracing",
"tracing-subscriber 0.3.17",
]
[[package]]
name = "lru"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a"
dependencies = [
"hashbrown",
]
[[package]]
name = "lz4_flex"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3"
[[package]]
name = "macro_rules_attribute"
version = "0.1.3"
@ -1284,18 +1446,55 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
[[package]]
name = "matchers"
version = "0.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1"
dependencies = [
"regex-automata",
]
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata",
]
[[package]]
name = "matchit"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"
[[package]]
name = "measure_time"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56220900f1a0923789ecd6bf25fbae8af3b2f1ff3e9e297fc9b6b8674dd4d852"
dependencies = [
"instant",
"log",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memmap2"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327"
dependencies = [
"libc",
]
[[package]]
name = "memoffset"
version = "0.8.0"
@ -1378,6 +1577,15 @@ dependencies = [
"syn 2.0.18",
]
[[package]]
name = "murmurhash32"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a"
dependencies = [
"byteorder",
]
[[package]]
name = "native-tls"
version = "0.2.11"
@ -1471,6 +1679,15 @@ version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "oneshot"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc22d22931513428ea6cc089e942d38600e3d00976eef8c86de6b8a3aadec6eb"
dependencies = [
"loom",
]
[[package]]
name = "onig"
version = "6.4.0"
@ -1543,6 +1760,15 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "ownedbytes"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e957eaa64a299f39755416e5b3128c505e9d63a91d0453771ad2ccd3907f8db"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "parking_lot"
version = "0.12.1"
@ -1805,6 +2031,15 @@ dependencies = [
"regex-syntax 0.7.2",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
@ -1917,12 +2152,28 @@ dependencies = [
"walkdir",
]
[[package]]
name = "rust-stemmers"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
dependencies = [
"serde",
"serde_derive",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustix"
version = "0.37.19"
@ -1967,6 +2218,12 @@ dependencies = [
"windows-sys 0.42.0",
]
[[package]]
name = "scoped-tls"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -2162,6 +2419,12 @@ dependencies = [
"unicode-segmentation",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "strfmt"
version = "0.2.4"
@ -2254,7 +2517,7 @@ dependencies = [
"tower",
"tower-http",
"tracing",
"tracing-subscriber",
"tracing-subscriber 0.3.17",
"utoipa",
"utoipa-swagger-ui",
"uuid 1.3.3",
@ -2265,6 +2528,7 @@ name = "tabby-common"
version = "0.1.0"
dependencies = [
"chrono",
"filenamify",
"lazy_static",
"serde",
"serdeconv",
@ -2287,11 +2551,105 @@ dependencies = [
name = "tabby-scheduler"
version = "0.1.0"
dependencies = [
"anyhow",
"filenamify",
"job_scheduler",
"tabby-common",
"tantivy",
"temp_testdir",
"tracing",
"tracing-test",
"walkdir",
]
[[package]]
name = "tantivy"
version = "0.19.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bb26a6b22c84d8be41d99a14016d6f04d30d8d31a2ea411a8ab553af5cc490d"
dependencies = [
"aho-corasick 0.7.20",
"arc-swap",
"async-trait",
"base64 0.13.1",
"bitpacking",
"byteorder",
"census",
"crc32fast",
"crossbeam-channel",
"downcast-rs",
"fail",
"fastdivide",
"fastfield_codecs",
"fs2",
"htmlescape",
"itertools 0.10.5",
"levenshtein_automata",
"log",
"lru",
"lz4_flex",
"measure_time",
"memmap2",
"murmurhash32",
"num_cpus",
"once_cell",
"oneshot",
"ownedbytes",
"rayon",
"regex",
"rust-stemmers",
"rustc-hash",
"serde",
"serde_json",
"smallvec",
"stable_deref_trait",
"tantivy-bitpacker",
"tantivy-common",
"tantivy-fst",
"tantivy-query-grammar",
"tempfile",
"thiserror",
"time 0.3.21",
"uuid 1.3.3",
"winapi",
]
[[package]]
name = "tantivy-bitpacker"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e71a0c95b82d4292b097a09b989a6380d28c3a86800c841a2d03bae1fc8b9fa6"
[[package]]
name = "tantivy-common"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14fef4182bb60df9a4b92cd8ecab39ba2e50a05542934af17eef1f49660705cb"
dependencies = [
"byteorder",
"ownedbytes",
]
[[package]]
name = "tantivy-fst"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc3c506b1a8443a3a65352df6382a1fb6a7afe1a02e871cee0d25e2c3d5f3944"
dependencies = [
"byteorder",
"regex-syntax 0.6.29",
"utf8-ranges",
]
[[package]]
name = "tantivy-query-grammar"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "343e3ada4c1c480953f6960f8a21ce9c76611480ffdd4f4e230fdddce0fc5331"
dependencies = [
"combine",
"once_cell",
"regex",
]
[[package]]
@ -2380,8 +2738,10 @@ version = "0.3.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc"
dependencies = [
"itoa",
"serde",
"time-core",
"time-macros",
]
[[package]]
@ -2390,6 +2750,15 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
[[package]]
name = "time-macros"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b"
dependencies = [
"time-core",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
@ -2620,20 +2989,79 @@ dependencies = [
"tracing-core",
]
[[package]]
name = "tracing-serde"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1"
dependencies = [
"serde",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71"
dependencies = [
"ansi_term",
"chrono",
"lazy_static",
"matchers 0.0.1",
"regex",
"serde",
"serde_json",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
"tracing-serde",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
dependencies = [
"matchers 0.1.0",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
name = "tracing-test"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3b48778c2d401c6a7fcf38a0e3c55dc8e8e753cbd381044a8cdb6fd69a29f53"
dependencies = [
"lazy_static",
"tracing-core",
"tracing-subscriber 0.2.25",
"tracing-test-macro",
]
[[package]]
name = "tracing-test-macro"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c49adbab879d2e0dd7f75edace5f0ac2156939ecb7e6a1e8fa14e53728328c48"
dependencies = [
"lazy_static",
"quote",
"syn 1.0.109",
]
[[package]]
name = "trackable"
version = "1.3.0"
@ -2733,6 +3161,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "utf8-ranges"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba"
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -2796,6 +3230,7 @@ checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2"
dependencies = [
"getrandom",
"rand",
"serde",
"uuid-macro-internal",
]

View File

@ -22,3 +22,4 @@ tokio = "1.28"
tokio-util = "0.7"
tracing = "0.1"
tracing-subscriber = "0.3"
anyhow = "1.0.71"

View File

@ -5,6 +5,7 @@ edition = "2021"
[dependencies]
chrono = "0.4.26"
filenamify = "0.1.0"
lazy_static = { workspace = true }
serde = { workspace = true }
serdeconv = { workspace = true }

View File

@ -1,26 +1,28 @@
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use filenamify::filenamify;
use serde::Deserialize;
use crate::path::repositories_dir;
#[derive(Deserialize)]
#[cfg_attr(feature = "testutils", derive(Serialize))]
pub struct Config {
pub repositories: Vec<Repository>,
}
impl Config {
pub fn load() -> Self {
pub fn load() -> Result<Self, serdeconv::Error> {
serdeconv::from_toml_file(crate::path::config_file().as_path())
.expect("Failed to read config file")
}
#[cfg(feature = "testutils")]
pub fn save(&self) {
let config_file = crate::path::config_file();
std::fs::create_dir_all(config_file.parent().unwrap()).unwrap();
serdeconv::to_toml_file(self, config_file).expect("Failed to write config file")
}
}
#[derive(Serialize, Deserialize)]
#[derive(Deserialize)]
pub struct Repository {
pub git_url: String,
}
impl Repository {
pub fn dir(&self) -> PathBuf {
repositories_dir().join(filenamify(&self.git_url))
}
}

View File

@ -31,6 +31,10 @@ pub fn repositories_dir() -> PathBuf {
tabby_root().join("repositories")
}
pub fn index_dir() -> PathBuf {
tabby_root().join("index")
}
pub fn models_dir() -> PathBuf {
tabby_root().join("models")
}

View File

@ -8,6 +8,6 @@ tabby-common = { path = "../tabby-common" }
indicatif = "0.17.3"
futures-util = "0.3.28"
reqwest = { version = "0.11.18", features = ["stream", "json"] }
anyhow = "1.0.71"
anyhow = { workspace = true }
serde = { workspace = true }
serdeconv = { workspace = true }

View File

@ -6,11 +6,15 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = { workspace = true }
filenamify = "0.1.0"
job_scheduler = "1.2.1"
tabby-common = { path = "../tabby-common" }
tantivy = "0.19.2"
tracing = { workspace = true }
walkdir = "2.3.3"
[dev-dependencies]
temp_testdir = "0.2"
tabby-common = { path = "../tabby-common", features = [ "testutils" ] }
tracing-test = "0.1"

View File

@ -0,0 +1,83 @@
use std::fs::{self, read_to_string};
use anyhow::Result;
use tabby_common::{
config::{Config, Repository},
path::index_dir,
};
use tantivy::{
directory::MmapDirectory,
doc,
schema::{Schema, STORED, STRING, TEXT},
Index, IndexWriter,
};
use tracing::{info, warn};
use walkdir::{DirEntry, WalkDir};
trait RepositoryExt {
fn index(&self, schema: &Schema, writer: &mut IndexWriter);
}
impl RepositoryExt for Repository {
fn index(&self, schema: &Schema, writer: &mut IndexWriter) {
let git_url = schema.get_field("git_url").unwrap();
let filepath = schema.get_field("filepath").unwrap();
let content = schema.get_field("content").unwrap();
let dir = self.dir();
info!("Start indexing repository {}", self.git_url);
let walk_dir = WalkDir::new(dir.as_path())
.into_iter()
.filter_entry(is_not_hidden)
.filter_map(Result::ok)
.filter(|e| !e.file_type().is_dir());
for entry in walk_dir {
let relative_path = entry.path().strip_prefix(dir.as_path()).unwrap();
if let Ok(file_content) = read_to_string(entry.path()) {
info!("Indexing {:?}", relative_path);
writer
.add_document(doc!(
git_url => self.git_url.clone(),
filepath => relative_path.display().to_string(),
content => file_content,
))
.unwrap();
} else {
warn!("Skip {:?}", relative_path);
}
}
}
}
fn is_not_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| entry.depth() == 0 || !s.starts_with('.'))
.unwrap_or(false)
}
fn create_schema() -> Schema {
let mut builder = Schema::builder();
builder.add_text_field("git_url", STRING | STORED);
builder.add_text_field("filepath", STRING | STORED);
builder.add_text_field("content", TEXT | STORED);
builder.build()
}
pub fn index_repositories(config: &Config) {
let schema = create_schema();
fs::create_dir_all(index_dir()).unwrap();
let directory = MmapDirectory::open(index_dir()).unwrap();
let index = Index::open_or_create(directory, schema.clone()).unwrap();
let mut writer = index.writer(10_000_000).unwrap();
writer.delete_all_documents().unwrap();
for repository in config.repositories.as_slice() {
repository.index(&schema, &mut writer);
}
writer.commit().unwrap();
}

View File

@ -1,23 +1,67 @@
mod index;
mod repository;
use std::time::Duration;
use job_scheduler::{Job, JobScheduler};
use tracing::info;
use tabby_common::config::Config;
use tracing::{error, info};
pub fn scheduler() {
pub fn scheduler(now: bool) {
let config = Config::load();
if config.is_err() {
error!("Please create config.toml before using scheduler");
return;
}
let config = config.unwrap();
let mut scheduler = JobScheduler::new();
// Every 5 hours.
scheduler.add(Job::new("* * 1/5 * * *".parse().unwrap(), || {
let job = || {
info!("Syncing repositories...");
repository::sync_repositories();
}));
repository::sync_repositories(&config);
info!("Indexing repositories...");
index::index_repositories(&config);
};
if now {
job()
} else {
// Every 5 hours.
scheduler.add(Job::new("0 0 1/5 * * * *".parse().unwrap(), job));
info!("Scheduler activated...");
loop {
info!("Checking for jobs in queue...");
scheduler.tick();
std::thread::sleep(Duration::from_secs(10));
let duration = scheduler.time_till_next_job();
info!("Sleep {:?} for next job ...", duration);
std::thread::sleep(duration);
}
}
}
#[cfg(test)]
mod tests {
use tabby_common::{
config::{Config, Repository},
path::set_tabby_root,
};
use temp_testdir::*;
use tracing_test::traced_test;
use super::*;
#[traced_test]
#[test]
fn end_to_end() {
set_tabby_root(TempDir::default().to_path_buf());
let config = Config {
repositories: vec![Repository {
git_url: "https://github.com/TabbyML/interview-questions".to_owned(),
}],
};
repository::sync_repositories(&config);
index::index_repositories(&config);
}
}

View File

@ -1,10 +1,6 @@
use std::{path::PathBuf, process::Command};
use std::process::Command;
use filenamify::filenamify;
use tabby_common::{
config::{Config, Repository},
path::repositories_dir,
};
use tabby_common::config::{Config, Repository};
trait ConfigExt {
fn sync_repositories(&self);
@ -19,15 +15,10 @@ impl ConfigExt for Config {
}
trait RepositoryExt {
fn dir(&self) -> PathBuf;
fn sync(&self);
}
impl RepositoryExt for Repository {
fn dir(&self) -> PathBuf {
repositories_dir().join(filenamify(&self.git_url))
}
fn sync(&self) {
let dir = self.dir();
let dir_string = dir.display().to_string();
@ -62,32 +53,6 @@ impl RepositoryExt for Repository {
}
}
pub fn sync_repositories() {
let config = Config::load();
pub fn sync_repositories(config: &Config) {
config.sync_repositories();
}
#[cfg(test)]
mod tests {
use tabby_common::{
config::{Config, Repository},
path::set_tabby_root,
};
use temp_testdir::*;
use super::*;
#[test]
fn it_works() {
set_tabby_root(TempDir::default().to_path_buf());
let config = Config {
repositories: vec![Repository {
git_url: "https://github.com/TabbyML/interview-questions".to_owned(),
}],
};
config.save();
sync_repositories();
}
}

View File

@ -2,6 +2,7 @@ mod download;
mod serve;
use clap::{Parser, Subcommand};
use tracing_subscriber::EnvFilter;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
@ -20,18 +21,27 @@ pub enum Commands {
Download(download::DownloadArgs),
/// Starts the scheduler process.
Scheduler,
Scheduler(SchedulerArgs),
}
#[derive(clap::Args)]
pub struct SchedulerArgs {
/// If true, runs scheduler jobs immediately.
#[clap(long, default_value_t = false)]
now: bool,
}
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
tracing_subscriber::fmt::fmt()
.with_env_filter(EnvFilter::from_default_env().add_directive("tabby=info".parse().unwrap()))
.init();
let cli = Cli::parse();
match &cli.command {
Commands::Serve(args) => serve::main(args).await,
Commands::Download(args) => download::main(args).await,
Commands::Scheduler => tabby_scheduler::scheduler(),
Commands::Scheduler(args) => tabby_scheduler::scheduler(args.now),
}
}