feat: support local directory for Repository.git_url (#622)
parent
989dda9f1b
commit
422b27d2ed
|
|
@ -3,6 +3,7 @@
|
|||
## Features
|
||||
* Supports golang: https://github.com/TabbyML/tabby/issues/553
|
||||
* Supports ruby: https://github.com/TabbyML/tabby/pull/597
|
||||
* Supports using local directory for `Repository.git_url`: use `file:///path/to/repo` to specify a local directory.
|
||||
|
||||
## Fixes and Improvements
|
||||
* Improve snippets retrieval by dedup candidates to existing content + snippets: https://github.com/TabbyML/tabby/pull/582
|
||||
|
|
|
|||
|
|
@ -50,9 +50,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.0.1"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"
|
||||
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
|
@ -345,6 +345,16 @@ dependencies = [
|
|||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c79ad7fb2dd38f3dabd76b09c6a5a20c038fc0213ef1e9afd30eb777f120f019"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.13.0"
|
||||
|
|
@ -1213,6 +1223,19 @@ version = "0.3.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d"
|
||||
dependencies = [
|
||||
"aho-corasick 1.1.2",
|
||||
"bstr",
|
||||
"fnv",
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.3.19"
|
||||
|
|
@ -1425,6 +1448,23 @@ dependencies = [
|
|||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492"
|
||||
dependencies = [
|
||||
"globset",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"memchr",
|
||||
"regex",
|
||||
"same-file",
|
||||
"thread_local",
|
||||
"walkdir",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
|
|
@ -2518,7 +2558,7 @@ version = "1.10.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87"
|
||||
dependencies = [
|
||||
"aho-corasick 1.0.1",
|
||||
"aho-corasick 1.1.2",
|
||||
"memchr",
|
||||
"regex-automata 0.4.1",
|
||||
"regex-syntax 0.8.1",
|
||||
|
|
@ -2539,7 +2579,7 @@ version = "0.4.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b"
|
||||
dependencies = [
|
||||
"aho-corasick 1.0.1",
|
||||
"aho-corasick 1.1.2",
|
||||
"memchr",
|
||||
"regex-syntax 0.8.1",
|
||||
]
|
||||
|
|
@ -3208,6 +3248,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"file-rotate",
|
||||
"filenamify",
|
||||
"ignore",
|
||||
"job_scheduler",
|
||||
"lazy_static",
|
||||
"serde",
|
||||
|
|
@ -3225,7 +3266,6 @@ dependencies = [
|
|||
"tree-sitter-rust",
|
||||
"tree-sitter-tags",
|
||||
"tree-sitter-typescript",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3234,7 +3274,7 @@ version = "0.21.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae"
|
||||
dependencies = [
|
||||
"aho-corasick 1.0.1",
|
||||
"aho-corasick 1.1.2",
|
||||
"arc-swap",
|
||||
"async-trait",
|
||||
"base64 0.21.2",
|
||||
|
|
|
|||
|
|
@ -49,17 +49,40 @@ pub struct Repository {
|
|||
|
||||
impl Repository {
|
||||
pub fn dir(&self) -> PathBuf {
|
||||
repositories_dir().join(filenamify(&self.git_url))
|
||||
if self.is_local_dir() {
|
||||
let path = self.git_url.strip_prefix("file:/").unwrap();
|
||||
path.into()
|
||||
} else {
|
||||
repositories_dir().join(filenamify(&self.git_url))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_local_dir(&self) -> bool {
|
||||
self.git_url.starts_with("file://")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Config;
|
||||
use super::{Config, Repository};
|
||||
|
||||
#[test]
|
||||
fn it_parses_empty_config() {
|
||||
let config = serdeconv::from_toml_str::<Config>("");
|
||||
debug_assert!(config.is_ok(), "{}", config.err().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_parses_local_dir() {
|
||||
let repo = Repository {
|
||||
git_url: "file://home/user".to_owned(),
|
||||
};
|
||||
assert!(repo.is_local_dir());
|
||||
assert_eq!(repo.dir().display().to_string(), "/home/user");
|
||||
|
||||
let repo = Repository {
|
||||
git_url: "https://github.com/TabbyML/tabby".to_owned(),
|
||||
};
|
||||
assert!(!repo.is_local_dir());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ tabby-common = { path = "../tabby-common" }
|
|||
tantivy = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tree-sitter-tags = "0.20.2"
|
||||
walkdir = "2.3.3"
|
||||
lazy_static = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde-jsonlines = { workspace = true }
|
||||
|
|
@ -23,6 +22,7 @@ tree-sitter-rust = "0.20.3"
|
|||
tree-sitter-typescript = "0.20.3"
|
||||
tree-sitter-go = "0.20.0"
|
||||
tree-sitter-ruby= "0.20.0"
|
||||
ignore = "0.4.20"
|
||||
|
||||
[dev-dependencies]
|
||||
temp_testdir = "0.2"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use std::{
|
|||
|
||||
use anyhow::Result;
|
||||
use file_rotate::{compression::Compression, suffix::AppendCount, ContentLimit, FileRotate};
|
||||
use ignore::{DirEntry, Walk};
|
||||
use lazy_static::lazy_static;
|
||||
use serde_jsonlines::WriteExt;
|
||||
use tabby_common::{
|
||||
|
|
@ -16,7 +17,6 @@ use tabby_common::{
|
|||
};
|
||||
use tracing::{error, info};
|
||||
use tree_sitter_tags::{TagsConfiguration, TagsContext};
|
||||
use walkdir::{DirEntry, WalkDir};
|
||||
|
||||
trait RepositoryExt {
|
||||
fn create_dataset(&self, writer: &mut impl Write) -> Result<()>;
|
||||
|
|
@ -27,9 +27,7 @@ impl RepositoryExt for Repository {
|
|||
let dir = self.dir();
|
||||
|
||||
info!("Start indexing repository {}", self.git_url);
|
||||
let walk_dir = WalkDir::new(dir.as_path())
|
||||
.into_iter()
|
||||
.filter_entry(is_not_hidden)
|
||||
let walk_dir = Walk::new(dir.as_path())
|
||||
.filter_map(Result::ok)
|
||||
.filter(is_source_code);
|
||||
|
||||
|
|
@ -67,21 +65,13 @@ fn get_language(ext: &OsStr) -> Option<&str> {
|
|||
}
|
||||
|
||||
fn is_source_code(entry: &DirEntry) -> bool {
|
||||
if entry.file_type().is_file() {
|
||||
if entry.file_type().is_some_and(|x| x.is_file()) {
|
||||
entry.path().extension().and_then(get_language).is_some()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn is_not_hidden(entry: &DirEntry) -> bool {
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.map(|s| entry.depth() == 0 || !s.starts_with('.'))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn create_dataset(config: &Config) -> Result<()> {
|
||||
fs::remove_dir_all(dataset_dir()).ok();
|
||||
fs::create_dir_all(dataset_dir())?;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,13 @@ trait ConfigExt {
|
|||
impl ConfigExt for Config {
|
||||
fn sync_repositories(&self) -> Result<()> {
|
||||
for repository in self.repositories.iter() {
|
||||
repository.sync()?;
|
||||
if repository.is_local_dir() {
|
||||
if !repository.dir().exists() {
|
||||
panic!("Directory {} does not exist", repository.dir().display());
|
||||
}
|
||||
} else {
|
||||
repository.sync()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
Loading…
Reference in New Issue