From 249d51d0f5582cd343b2016d8e463666be6e0ac6 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Mon, 5 Jun 2023 15:18:10 -0700 Subject: [PATCH] feat: add indexer [TAB-17] (#199) * add basic indexer * formatting --- Cargo.lock | 437 ++++++++++++++++++++++- Cargo.toml | 1 + crates/tabby-common/Cargo.toml | 1 + crates/tabby-common/src/config.rs | 26 +- crates/tabby-common/src/path.rs | 4 + crates/tabby-download/Cargo.toml | 2 +- crates/tabby-scheduler/Cargo.toml | 4 + crates/tabby-scheduler/src/index.rs | 83 +++++ crates/tabby-scheduler/src/lib.rs | 70 +++- crates/tabby-scheduler/src/repository.rs | 41 +-- crates/tabby/src/main.rs | 16 +- 11 files changed, 617 insertions(+), 68 deletions(-) create mode 100644 crates/tabby-scheduler/src/index.rs diff --git a/Cargo.lock b/Cargo.lock index e7ba231..ebd2492 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,6 +28,17 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.20" @@ -61,6 +72,15 @@ dependencies = [ "libc", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anstream" version = "0.3.2" @@ -116,6 +136,12 @@ version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + [[package]] name = "async-trait" version = "0.1.68" @@ -221,6 +247,15 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitpacking" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" +dependencies = [ + "crunchy", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -300,6 +335,12 @@ dependencies = [ "jobserver", ] +[[package]] +name = "census" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fafee10a5dd1cffcb5cc560e0d0df8803d7355a2b12272e3557dee57314cb6e" + [[package]] name = "cfg-if" version = "1.0.0" @@ -398,6 +439,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "combine" +version = "4.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" +dependencies = [ + "memchr", +] + [[package]] name = "console" version = "0.15.7" @@ -505,6 +555,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-common" version = "0.1.6" @@ -670,6 +726,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "downcast-rs" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" + [[package]] name = "either" version = "1.8.1" @@ -730,6 +792,37 @@ dependencies = [ "cc", ] +[[package]] +name = "fail" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c" +dependencies = [ + "log", + "once_cell", + "rand", +] + +[[package]] +name = "fastdivide" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04" + +[[package]] +name = "fastfield_codecs" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374a3a53c1bd5fb31b10084229290eafb0a05f260ec90f1f726afffda4877a8a" +dependencies = [ + "fastdivide", + "itertools 0.10.5", + "log", + "ownedbytes", + "tantivy-bitpacker", + "tantivy-common", +] + [[package]] name = "fastrand" version = "1.9.0" @@ -871,6 +964,19 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3e123d9ae7c02966b4d892e550bdc32164f05853cd40ab570650ad600596a8a" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -928,6 +1034,9 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] [[package]] name = "heck" @@ -959,6 +1068,12 @@ dependencies = [ "digest", ] +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + [[package]] name = "http" version = "0.2.9" @@ -1138,6 +1253,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", ] [[package]] @@ -1187,6 +1305,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -1228,6 +1355,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + [[package]] name = "libc" version = "0.2.144" @@ -1268,6 +1401,35 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "pin-utils", + "scoped-tls", + "tracing", + "tracing-subscriber 0.3.17", +] + +[[package]] +name = "lru" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "lz4_flex" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3" + [[package]] name = "macro_rules_attribute" version = "0.1.3" @@ -1284,18 +1446,55 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d" +[[package]] +name = "matchers" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata", +] + [[package]] name = "matchit" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40" +[[package]] +name = "measure_time" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56220900f1a0923789ecd6bf25fbae8af3b2f1ff3e9e297fc9b6b8674dd4d852" +dependencies = [ + "instant", + "log", +] + [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.8.0" @@ -1378,6 +1577,15 @@ dependencies = [ "syn 2.0.18", ] +[[package]] +name = "murmurhash32" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a" +dependencies = [ + "byteorder", +] + [[package]] name = "native-tls" version = "0.2.11" @@ -1471,6 +1679,15 @@ version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +[[package]] +name = "oneshot" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc22d22931513428ea6cc089e942d38600e3d00976eef8c86de6b8a3aadec6eb" +dependencies = [ + "loom", +] + [[package]] name = "onig" version = "6.4.0" @@ -1543,6 +1760,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "ownedbytes" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e957eaa64a299f39755416e5b3128c505e9d63a91d0453771ad2ccd3907f8db" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "parking_lot" version = "0.12.1" @@ -1805,6 +2031,15 @@ dependencies = [ "regex-syntax 0.7.2", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + [[package]] name = "regex-syntax" version = "0.6.29" @@ -1917,12 +2152,28 @@ dependencies = [ "walkdir", ] +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustc-demangle" version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustix" version = "0.37.19" @@ -1967,6 +2218,12 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.1.0" @@ -2162,6 +2419,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strfmt" version = "0.2.4" @@ -2254,7 +2517,7 @@ dependencies = [ "tower", "tower-http", "tracing", - "tracing-subscriber", + "tracing-subscriber 0.3.17", "utoipa", "utoipa-swagger-ui", "uuid 1.3.3", @@ -2265,6 +2528,7 @@ name = "tabby-common" version = "0.1.0" dependencies = [ "chrono", + "filenamify", "lazy_static", "serde", "serdeconv", @@ -2287,11 +2551,105 @@ dependencies = [ name = "tabby-scheduler" version = "0.1.0" dependencies = [ + "anyhow", "filenamify", "job_scheduler", "tabby-common", + "tantivy", "temp_testdir", "tracing", + "tracing-test", + "walkdir", +] + +[[package]] +name = "tantivy" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb26a6b22c84d8be41d99a14016d6f04d30d8d31a2ea411a8ab553af5cc490d" +dependencies = [ + "aho-corasick 0.7.20", + "arc-swap", + "async-trait", + "base64 0.13.1", + "bitpacking", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fail", + "fastdivide", + "fastfield_codecs", + "fs2", + "htmlescape", + "itertools 0.10.5", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "murmurhash32", + "num_cpus", + "once_cell", + "oneshot", + "ownedbytes", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash", + "serde", + "serde_json", + "smallvec", + "stable_deref_trait", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tempfile", + "thiserror", + "time 0.3.21", + "uuid 1.3.3", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e71a0c95b82d4292b097a09b989a6380d28c3a86800c841a2d03bae1fc8b9fa6" + +[[package]] +name = "tantivy-common" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14fef4182bb60df9a4b92cd8ecab39ba2e50a05542934af17eef1f49660705cb" +dependencies = [ + "byteorder", + "ownedbytes", +] + +[[package]] +name = "tantivy-fst" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3c506b1a8443a3a65352df6382a1fb6a7afe1a02e871cee0d25e2c3d5f3944" +dependencies = [ + "byteorder", + "regex-syntax 0.6.29", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343e3ada4c1c480953f6960f8a21ce9c76611480ffdd4f4e230fdddce0fc5331" +dependencies = [ + "combine", + "once_cell", + "regex", ] [[package]] @@ -2380,8 +2738,10 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc" dependencies = [ + "itoa", "serde", "time-core", + "time-macros", ] [[package]] @@ -2390,6 +2750,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +[[package]] +name = "time-macros" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b" +dependencies = [ + "time-core", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -2620,20 +2989,79 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71" +dependencies = [ + "ansi_term", + "chrono", + "lazy_static", + "matchers 0.0.1", + "regex", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + [[package]] name = "tracing-subscriber" version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" dependencies = [ + "matchers 0.1.0", "nu-ansi-term", + "once_cell", + "regex", "sharded-slab", "smallvec", "thread_local", + "tracing", "tracing-core", "tracing-log", ] +[[package]] +name = "tracing-test" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3b48778c2d401c6a7fcf38a0e3c55dc8e8e753cbd381044a8cdb6fd69a29f53" +dependencies = [ + "lazy_static", + "tracing-core", + "tracing-subscriber 0.2.25", + "tracing-test-macro", +] + +[[package]] +name = "tracing-test-macro" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c49adbab879d2e0dd7f75edace5f0ac2156939ecb7e6a1e8fa14e53728328c48" +dependencies = [ + "lazy_static", + "quote", + "syn 1.0.109", +] + [[package]] name = "trackable" version = "1.3.0" @@ -2733,6 +3161,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + [[package]] name = "utf8parse" version = "0.2.1" @@ -2796,6 +3230,7 @@ checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" dependencies = [ "getrandom", "rand", + "serde", "uuid-macro-internal", ] diff --git a/Cargo.toml b/Cargo.toml index e8994c9..16964d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,3 +22,4 @@ tokio = "1.28" tokio-util = "0.7" tracing = "0.1" tracing-subscriber = "0.3" +anyhow = "1.0.71" diff --git a/crates/tabby-common/Cargo.toml b/crates/tabby-common/Cargo.toml index e6890d8..1a705c7 100644 --- a/crates/tabby-common/Cargo.toml +++ b/crates/tabby-common/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] chrono = "0.4.26" +filenamify = "0.1.0" lazy_static = { workspace = true } serde = { workspace = true } serdeconv = { workspace = true } diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs index a89370e..8b4df9a 100644 --- a/crates/tabby-common/src/config.rs +++ b/crates/tabby-common/src/config.rs @@ -1,26 +1,28 @@ -use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +use filenamify::filenamify; +use serde::Deserialize; + +use crate::path::repositories_dir; #[derive(Deserialize)] -#[cfg_attr(feature = "testutils", derive(Serialize))] pub struct Config { pub repositories: Vec, } impl Config { - pub fn load() -> Self { + pub fn load() -> Result { serdeconv::from_toml_file(crate::path::config_file().as_path()) - .expect("Failed to read config file") - } - - #[cfg(feature = "testutils")] - pub fn save(&self) { - let config_file = crate::path::config_file(); - std::fs::create_dir_all(config_file.parent().unwrap()).unwrap(); - serdeconv::to_toml_file(self, config_file).expect("Failed to write config file") } } -#[derive(Serialize, Deserialize)] +#[derive(Deserialize)] pub struct Repository { pub git_url: String, } + +impl Repository { + pub fn dir(&self) -> PathBuf { + repositories_dir().join(filenamify(&self.git_url)) + } +} diff --git a/crates/tabby-common/src/path.rs b/crates/tabby-common/src/path.rs index 56c28a0..f17ff54 100644 --- a/crates/tabby-common/src/path.rs +++ b/crates/tabby-common/src/path.rs @@ -31,6 +31,10 @@ pub fn repositories_dir() -> PathBuf { tabby_root().join("repositories") } +pub fn index_dir() -> PathBuf { + tabby_root().join("index") +} + pub fn models_dir() -> PathBuf { tabby_root().join("models") } diff --git a/crates/tabby-download/Cargo.toml b/crates/tabby-download/Cargo.toml index 68d9e12..fd52341 100644 --- a/crates/tabby-download/Cargo.toml +++ b/crates/tabby-download/Cargo.toml @@ -8,6 +8,6 @@ tabby-common = { path = "../tabby-common" } indicatif = "0.17.3" futures-util = "0.3.28" reqwest = { version = "0.11.18", features = ["stream", "json"] } -anyhow = "1.0.71" +anyhow = { workspace = true } serde = { workspace = true } serdeconv = { workspace = true } diff --git a/crates/tabby-scheduler/Cargo.toml b/crates/tabby-scheduler/Cargo.toml index fc4b1d5..fb97f8c 100644 --- a/crates/tabby-scheduler/Cargo.toml +++ b/crates/tabby-scheduler/Cargo.toml @@ -6,11 +6,15 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +anyhow = { workspace = true } filenamify = "0.1.0" job_scheduler = "1.2.1" tabby-common = { path = "../tabby-common" } +tantivy = "0.19.2" tracing = { workspace = true } +walkdir = "2.3.3" [dev-dependencies] temp_testdir = "0.2" tabby-common = { path = "../tabby-common", features = [ "testutils" ] } +tracing-test = "0.1" diff --git a/crates/tabby-scheduler/src/index.rs b/crates/tabby-scheduler/src/index.rs new file mode 100644 index 0000000..9aeb40a --- /dev/null +++ b/crates/tabby-scheduler/src/index.rs @@ -0,0 +1,83 @@ +use std::fs::{self, read_to_string}; + +use anyhow::Result; +use tabby_common::{ + config::{Config, Repository}, + path::index_dir, +}; +use tantivy::{ + directory::MmapDirectory, + doc, + schema::{Schema, STORED, STRING, TEXT}, + Index, IndexWriter, +}; +use tracing::{info, warn}; +use walkdir::{DirEntry, WalkDir}; + +trait RepositoryExt { + fn index(&self, schema: &Schema, writer: &mut IndexWriter); +} + +impl RepositoryExt for Repository { + fn index(&self, schema: &Schema, writer: &mut IndexWriter) { + let git_url = schema.get_field("git_url").unwrap(); + let filepath = schema.get_field("filepath").unwrap(); + let content = schema.get_field("content").unwrap(); + let dir = self.dir(); + + info!("Start indexing repository {}", self.git_url); + let walk_dir = WalkDir::new(dir.as_path()) + .into_iter() + .filter_entry(is_not_hidden) + .filter_map(Result::ok) + .filter(|e| !e.file_type().is_dir()); + + for entry in walk_dir { + let relative_path = entry.path().strip_prefix(dir.as_path()).unwrap(); + if let Ok(file_content) = read_to_string(entry.path()) { + info!("Indexing {:?}", relative_path); + writer + .add_document(doc!( + git_url => self.git_url.clone(), + filepath => relative_path.display().to_string(), + content => file_content, + )) + .unwrap(); + } else { + warn!("Skip {:?}", relative_path); + } + } + } +} + +fn is_not_hidden(entry: &DirEntry) -> bool { + entry + .file_name() + .to_str() + .map(|s| entry.depth() == 0 || !s.starts_with('.')) + .unwrap_or(false) +} + +fn create_schema() -> Schema { + let mut builder = Schema::builder(); + builder.add_text_field("git_url", STRING | STORED); + builder.add_text_field("filepath", STRING | STORED); + builder.add_text_field("content", TEXT | STORED); + builder.build() +} + +pub fn index_repositories(config: &Config) { + let schema = create_schema(); + + fs::create_dir_all(index_dir()).unwrap(); + let directory = MmapDirectory::open(index_dir()).unwrap(); + let index = Index::open_or_create(directory, schema.clone()).unwrap(); + let mut writer = index.writer(10_000_000).unwrap(); + + writer.delete_all_documents().unwrap(); + for repository in config.repositories.as_slice() { + repository.index(&schema, &mut writer); + } + + writer.commit().unwrap(); +} diff --git a/crates/tabby-scheduler/src/lib.rs b/crates/tabby-scheduler/src/lib.rs index d9bb2e8..9706fab 100644 --- a/crates/tabby-scheduler/src/lib.rs +++ b/crates/tabby-scheduler/src/lib.rs @@ -1,23 +1,67 @@ +mod index; mod repository; -use std::time::Duration; - use job_scheduler::{Job, JobScheduler}; -use tracing::info; +use tabby_common::config::Config; +use tracing::{error, info}; -pub fn scheduler() { +pub fn scheduler(now: bool) { + let config = Config::load(); + if config.is_err() { + error!("Please create config.toml before using scheduler"); + return; + } + + let config = config.unwrap(); let mut scheduler = JobScheduler::new(); - // Every 5 hours. - scheduler.add(Job::new("* * 1/5 * * *".parse().unwrap(), || { + let job = || { info!("Syncing repositories..."); - repository::sync_repositories(); - })); + repository::sync_repositories(&config); - info!("Scheduler activated..."); - loop { - info!("Checking for jobs in queue..."); - scheduler.tick(); - std::thread::sleep(Duration::from_secs(10)); + info!("Indexing repositories..."); + index::index_repositories(&config); + }; + + if now { + job() + } else { + // Every 5 hours. + scheduler.add(Job::new("0 0 1/5 * * * *".parse().unwrap(), job)); + + info!("Scheduler activated..."); + loop { + scheduler.tick(); + let duration = scheduler.time_till_next_job(); + info!("Sleep {:?} for next job ...", duration); + std::thread::sleep(duration); + } + } +} + +#[cfg(test)] +mod tests { + use tabby_common::{ + config::{Config, Repository}, + path::set_tabby_root, + }; + use temp_testdir::*; + use tracing_test::traced_test; + + use super::*; + + #[traced_test] + #[test] + fn end_to_end() { + set_tabby_root(TempDir::default().to_path_buf()); + + let config = Config { + repositories: vec![Repository { + git_url: "https://github.com/TabbyML/interview-questions".to_owned(), + }], + }; + + repository::sync_repositories(&config); + index::index_repositories(&config); } } diff --git a/crates/tabby-scheduler/src/repository.rs b/crates/tabby-scheduler/src/repository.rs index 910467a..4c14f9c 100644 --- a/crates/tabby-scheduler/src/repository.rs +++ b/crates/tabby-scheduler/src/repository.rs @@ -1,10 +1,6 @@ -use std::{path::PathBuf, process::Command}; +use std::process::Command; -use filenamify::filenamify; -use tabby_common::{ - config::{Config, Repository}, - path::repositories_dir, -}; +use tabby_common::config::{Config, Repository}; trait ConfigExt { fn sync_repositories(&self); @@ -19,15 +15,10 @@ impl ConfigExt for Config { } trait RepositoryExt { - fn dir(&self) -> PathBuf; fn sync(&self); } impl RepositoryExt for Repository { - fn dir(&self) -> PathBuf { - repositories_dir().join(filenamify(&self.git_url)) - } - fn sync(&self) { let dir = self.dir(); let dir_string = dir.display().to_string(); @@ -62,32 +53,6 @@ impl RepositoryExt for Repository { } } -pub fn sync_repositories() { - let config = Config::load(); +pub fn sync_repositories(config: &Config) { config.sync_repositories(); } - -#[cfg(test)] -mod tests { - use tabby_common::{ - config::{Config, Repository}, - path::set_tabby_root, - }; - use temp_testdir::*; - - use super::*; - - #[test] - fn it_works() { - set_tabby_root(TempDir::default().to_path_buf()); - - let config = Config { - repositories: vec![Repository { - git_url: "https://github.com/TabbyML/interview-questions".to_owned(), - }], - }; - - config.save(); - sync_repositories(); - } -} diff --git a/crates/tabby/src/main.rs b/crates/tabby/src/main.rs index f2fd7f6..0150175 100644 --- a/crates/tabby/src/main.rs +++ b/crates/tabby/src/main.rs @@ -2,6 +2,7 @@ mod download; mod serve; use clap::{Parser, Subcommand}; +use tracing_subscriber::EnvFilter; #[derive(Parser)] #[command(author, version, about, long_about = None)] @@ -20,18 +21,27 @@ pub enum Commands { Download(download::DownloadArgs), /// Starts the scheduler process. - Scheduler, + Scheduler(SchedulerArgs), +} + +#[derive(clap::Args)] +pub struct SchedulerArgs { + /// If true, runs scheduler jobs immediately. + #[clap(long, default_value_t = false)] + now: bool, } #[tokio::main] async fn main() { - tracing_subscriber::fmt::init(); + tracing_subscriber::fmt::fmt() + .with_env_filter(EnvFilter::from_default_env().add_directive("tabby=info".parse().unwrap())) + .init(); let cli = Cli::parse(); match &cli.command { Commands::Serve(args) => serve::main(args).await, Commands::Download(args) => download::main(args).await, - Commands::Scheduler => tabby_scheduler::scheduler(), + Commands::Scheduler(args) => tabby_scheduler::scheduler(args.now), } }