diff --git a/CHANGELOG.md b/CHANGELOG.md index 95892d9..4b121bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Features * Supports golang: https://github.com/TabbyML/tabby/issues/553 * Supports ruby: https://github.com/TabbyML/tabby/pull/597 +* Supports using local directory for `Repository.git_url`: use `file:///path/to/repo` to specify a local directory. ## Fixes and Improvements * Improve snippets retrieval by dedup candidates to existing content + snippets: https://github.com/TabbyML/tabby/pull/582 diff --git a/Cargo.lock b/Cargo.lock index baac196..2081cf7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,9 +50,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.0.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] @@ -345,6 +345,16 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c79ad7fb2dd38f3dabd76b09c6a5a20c038fc0213ef1e9afd30eb777f120f019" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "bumpalo" version = "3.13.0" @@ -1213,6 +1223,19 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "globset" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d" +dependencies = [ + "aho-corasick 1.1.2", + "bstr", + "fnv", + "log", + "regex", +] + [[package]] name = "h2" version = "0.3.19" @@ -1425,6 +1448,23 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "ignore" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492" +dependencies = [ + "globset", + "lazy_static", + "log", + "memchr", + "regex", + "same-file", + "thread_local", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -2518,7 +2558,7 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87" dependencies = [ - "aho-corasick 1.0.1", + "aho-corasick 1.1.2", "memchr", "regex-automata 0.4.1", "regex-syntax 0.8.1", @@ -2539,7 +2579,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b" dependencies = [ - "aho-corasick 1.0.1", + "aho-corasick 1.1.2", "memchr", "regex-syntax 0.8.1", ] @@ -3208,6 +3248,7 @@ dependencies = [ "anyhow", "file-rotate", "filenamify", + "ignore", "job_scheduler", "lazy_static", "serde", @@ -3225,7 +3266,6 @@ dependencies = [ "tree-sitter-rust", "tree-sitter-tags", "tree-sitter-typescript", - "walkdir", ] [[package]] @@ -3234,7 +3274,7 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae" dependencies = [ - "aho-corasick 1.0.1", + "aho-corasick 1.1.2", "arc-swap", "async-trait", "base64 0.21.2", diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs index 7e77db7..69f7b66 100644 --- a/crates/tabby-common/src/config.rs +++ b/crates/tabby-common/src/config.rs @@ -49,17 +49,40 @@ pub struct Repository { impl Repository { pub fn dir(&self) -> PathBuf { - repositories_dir().join(filenamify(&self.git_url)) + if self.is_local_dir() { + let path = self.git_url.strip_prefix("file:/").unwrap(); + path.into() + } else { + repositories_dir().join(filenamify(&self.git_url)) + } + } + + pub fn is_local_dir(&self) -> bool { + self.git_url.starts_with("file://") } } #[cfg(test)] mod tests { - use super::Config; + use super::{Config, Repository}; #[test] fn it_parses_empty_config() { let config = serdeconv::from_toml_str::(""); debug_assert!(config.is_ok(), "{}", config.err().unwrap()); } + + #[test] + fn it_parses_local_dir() { + let repo = Repository { + git_url: "file://home/user".to_owned(), + }; + assert!(repo.is_local_dir()); + assert_eq!(repo.dir().display().to_string(), "/home/user"); + + let repo = Repository { + git_url: "https://github.com/TabbyML/tabby".to_owned(), + }; + assert!(!repo.is_local_dir()); + } } diff --git a/crates/tabby-scheduler/Cargo.toml b/crates/tabby-scheduler/Cargo.toml index 2f50b3c..1289cc8 100644 --- a/crates/tabby-scheduler/Cargo.toml +++ b/crates/tabby-scheduler/Cargo.toml @@ -13,7 +13,6 @@ tabby-common = { path = "../tabby-common" } tantivy = { workspace = true } tracing = { workspace = true } tree-sitter-tags = "0.20.2" -walkdir = "2.3.3" lazy_static = { workspace = true } serde = { workspace = true } serde-jsonlines = { workspace = true } @@ -23,6 +22,7 @@ tree-sitter-rust = "0.20.3" tree-sitter-typescript = "0.20.3" tree-sitter-go = "0.20.0" tree-sitter-ruby= "0.20.0" +ignore = "0.4.20" [dev-dependencies] temp_testdir = "0.2" diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index 7be9faf..a224a6d 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -7,6 +7,7 @@ use std::{ use anyhow::Result; use file_rotate::{compression::Compression, suffix::AppendCount, ContentLimit, FileRotate}; +use ignore::{DirEntry, Walk}; use lazy_static::lazy_static; use serde_jsonlines::WriteExt; use tabby_common::{ @@ -16,7 +17,6 @@ use tabby_common::{ }; use tracing::{error, info}; use tree_sitter_tags::{TagsConfiguration, TagsContext}; -use walkdir::{DirEntry, WalkDir}; trait RepositoryExt { fn create_dataset(&self, writer: &mut impl Write) -> Result<()>; @@ -27,9 +27,7 @@ impl RepositoryExt for Repository { let dir = self.dir(); info!("Start indexing repository {}", self.git_url); - let walk_dir = WalkDir::new(dir.as_path()) - .into_iter() - .filter_entry(is_not_hidden) + let walk_dir = Walk::new(dir.as_path()) .filter_map(Result::ok) .filter(is_source_code); @@ -67,21 +65,13 @@ fn get_language(ext: &OsStr) -> Option<&str> { } fn is_source_code(entry: &DirEntry) -> bool { - if entry.file_type().is_file() { + if entry.file_type().is_some_and(|x| x.is_file()) { entry.path().extension().and_then(get_language).is_some() } else { false } } -fn is_not_hidden(entry: &DirEntry) -> bool { - entry - .file_name() - .to_str() - .map(|s| entry.depth() == 0 || !s.starts_with('.')) - .unwrap_or(false) -} - pub fn create_dataset(config: &Config) -> Result<()> { fs::remove_dir_all(dataset_dir()).ok(); fs::create_dir_all(dataset_dir())?; diff --git a/crates/tabby-scheduler/src/repository.rs b/crates/tabby-scheduler/src/repository.rs index 8fb9809..6cf0fd5 100644 --- a/crates/tabby-scheduler/src/repository.rs +++ b/crates/tabby-scheduler/src/repository.rs @@ -10,7 +10,13 @@ trait ConfigExt { impl ConfigExt for Config { fn sync_repositories(&self) -> Result<()> { for repository in self.repositories.iter() { - repository.sync()?; + if repository.is_local_dir() { + if !repository.dir().exists() { + panic!("Directory {} does not exist", repository.dir().display()); + } + } else { + repository.sync()?; + } } Ok(())