feat: support local directory for Repository.git_url (#622)

r0.4
Meng Zhang 2023-10-23 18:29:38 -07:00 committed by GitHub
parent 989dda9f1b
commit 422b27d2ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 83 additions and 23 deletions

View File

@ -3,6 +3,7 @@
## Features
* Supports golang: https://github.com/TabbyML/tabby/issues/553
* Supports ruby: https://github.com/TabbyML/tabby/pull/597
* Supports using local directory for `Repository.git_url`: use `file:///path/to/repo` to specify a local directory.
## Fixes and Improvements
* Improve snippets retrieval by dedup candidates to existing content + snippets: https://github.com/TabbyML/tabby/pull/582

52
Cargo.lock generated
View File

@ -50,9 +50,9 @@ dependencies = [
[[package]]
name = "aho-corasick"
version = "1.0.1"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
@ -345,6 +345,16 @@ dependencies = [
"generic-array",
]
[[package]]
name = "bstr"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c79ad7fb2dd38f3dabd76b09c6a5a20c038fc0213ef1e9afd30eb777f120f019"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.13.0"
@ -1213,6 +1223,19 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "globset"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d"
dependencies = [
"aho-corasick 1.1.2",
"bstr",
"fnv",
"log",
"regex",
]
[[package]]
name = "h2"
version = "0.3.19"
@ -1425,6 +1448,23 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "ignore"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492"
dependencies = [
"globset",
"lazy_static",
"log",
"memchr",
"regex",
"same-file",
"thread_local",
"walkdir",
"winapi-util",
]
[[package]]
name = "indexmap"
version = "1.9.3"
@ -2518,7 +2558,7 @@ version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87"
dependencies = [
"aho-corasick 1.0.1",
"aho-corasick 1.1.2",
"memchr",
"regex-automata 0.4.1",
"regex-syntax 0.8.1",
@ -2539,7 +2579,7 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b"
dependencies = [
"aho-corasick 1.0.1",
"aho-corasick 1.1.2",
"memchr",
"regex-syntax 0.8.1",
]
@ -3208,6 +3248,7 @@ dependencies = [
"anyhow",
"file-rotate",
"filenamify",
"ignore",
"job_scheduler",
"lazy_static",
"serde",
@ -3225,7 +3266,6 @@ dependencies = [
"tree-sitter-rust",
"tree-sitter-tags",
"tree-sitter-typescript",
"walkdir",
]
[[package]]
@ -3234,7 +3274,7 @@ version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae"
dependencies = [
"aho-corasick 1.0.1",
"aho-corasick 1.1.2",
"arc-swap",
"async-trait",
"base64 0.21.2",

View File

@ -49,17 +49,40 @@ pub struct Repository {
impl Repository {
pub fn dir(&self) -> PathBuf {
repositories_dir().join(filenamify(&self.git_url))
if self.is_local_dir() {
let path = self.git_url.strip_prefix("file:/").unwrap();
path.into()
} else {
repositories_dir().join(filenamify(&self.git_url))
}
}
pub fn is_local_dir(&self) -> bool {
self.git_url.starts_with("file://")
}
}
#[cfg(test)]
mod tests {
use super::Config;
use super::{Config, Repository};
#[test]
fn it_parses_empty_config() {
let config = serdeconv::from_toml_str::<Config>("");
debug_assert!(config.is_ok(), "{}", config.err().unwrap());
}
#[test]
fn it_parses_local_dir() {
let repo = Repository {
git_url: "file://home/user".to_owned(),
};
assert!(repo.is_local_dir());
assert_eq!(repo.dir().display().to_string(), "/home/user");
let repo = Repository {
git_url: "https://github.com/TabbyML/tabby".to_owned(),
};
assert!(!repo.is_local_dir());
}
}

View File

@ -13,7 +13,6 @@ tabby-common = { path = "../tabby-common" }
tantivy = { workspace = true }
tracing = { workspace = true }
tree-sitter-tags = "0.20.2"
walkdir = "2.3.3"
lazy_static = { workspace = true }
serde = { workspace = true }
serde-jsonlines = { workspace = true }
@ -23,6 +22,7 @@ tree-sitter-rust = "0.20.3"
tree-sitter-typescript = "0.20.3"
tree-sitter-go = "0.20.0"
tree-sitter-ruby= "0.20.0"
ignore = "0.4.20"
[dev-dependencies]
temp_testdir = "0.2"

View File

@ -7,6 +7,7 @@ use std::{
use anyhow::Result;
use file_rotate::{compression::Compression, suffix::AppendCount, ContentLimit, FileRotate};
use ignore::{DirEntry, Walk};
use lazy_static::lazy_static;
use serde_jsonlines::WriteExt;
use tabby_common::{
@ -16,7 +17,6 @@ use tabby_common::{
};
use tracing::{error, info};
use tree_sitter_tags::{TagsConfiguration, TagsContext};
use walkdir::{DirEntry, WalkDir};
trait RepositoryExt {
fn create_dataset(&self, writer: &mut impl Write) -> Result<()>;
@ -27,9 +27,7 @@ impl RepositoryExt for Repository {
let dir = self.dir();
info!("Start indexing repository {}", self.git_url);
let walk_dir = WalkDir::new(dir.as_path())
.into_iter()
.filter_entry(is_not_hidden)
let walk_dir = Walk::new(dir.as_path())
.filter_map(Result::ok)
.filter(is_source_code);
@ -67,21 +65,13 @@ fn get_language(ext: &OsStr) -> Option<&str> {
}
fn is_source_code(entry: &DirEntry) -> bool {
if entry.file_type().is_file() {
if entry.file_type().is_some_and(|x| x.is_file()) {
entry.path().extension().and_then(get_language).is_some()
} else {
false
}
}
fn is_not_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| entry.depth() == 0 || !s.starts_with('.'))
.unwrap_or(false)
}
pub fn create_dataset(config: &Config) -> Result<()> {
fs::remove_dir_all(dataset_dir()).ok();
fs::create_dir_all(dataset_dir())?;

View File

@ -10,7 +10,13 @@ trait ConfigExt {
impl ConfigExt for Config {
fn sync_repositories(&self) -> Result<()> {
for repository in self.repositories.iter() {
repository.sync()?;
if repository.is_local_dir() {
if !repository.dir().exists() {
panic!("Directory {} does not exist", repository.dir().display());
}
} else {
repository.sync()?;
}
}
Ok(())