feat: support local directory for Repository.git_url (#622)
parent
989dda9f1b
commit
422b27d2ed
|
|
@ -3,6 +3,7 @@
|
||||||
## Features
|
## Features
|
||||||
* Supports golang: https://github.com/TabbyML/tabby/issues/553
|
* Supports golang: https://github.com/TabbyML/tabby/issues/553
|
||||||
* Supports ruby: https://github.com/TabbyML/tabby/pull/597
|
* Supports ruby: https://github.com/TabbyML/tabby/pull/597
|
||||||
|
* Supports using local directory for `Repository.git_url`: use `file:///path/to/repo` to specify a local directory.
|
||||||
|
|
||||||
## Fixes and Improvements
|
## Fixes and Improvements
|
||||||
* Improve snippets retrieval by dedup candidates to existing content + snippets: https://github.com/TabbyML/tabby/pull/582
|
* Improve snippets retrieval by dedup candidates to existing content + snippets: https://github.com/TabbyML/tabby/pull/582
|
||||||
|
|
|
||||||
|
|
@ -50,9 +50,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "1.0.1"
|
version = "1.1.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"
|
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
@ -345,6 +345,16 @@ dependencies = [
|
||||||
"generic-array",
|
"generic-array",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bstr"
|
||||||
|
version = "1.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c79ad7fb2dd38f3dabd76b09c6a5a20c038fc0213ef1e9afd30eb777f120f019"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bumpalo"
|
name = "bumpalo"
|
||||||
version = "3.13.0"
|
version = "3.13.0"
|
||||||
|
|
@ -1213,6 +1223,19 @@ version = "0.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "globset"
|
||||||
|
version = "0.4.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick 1.1.2",
|
||||||
|
"bstr",
|
||||||
|
"fnv",
|
||||||
|
"log",
|
||||||
|
"regex",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "h2"
|
name = "h2"
|
||||||
version = "0.3.19"
|
version = "0.3.19"
|
||||||
|
|
@ -1425,6 +1448,23 @@ dependencies = [
|
||||||
"unicode-normalization",
|
"unicode-normalization",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ignore"
|
||||||
|
version = "0.4.20"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492"
|
||||||
|
dependencies = [
|
||||||
|
"globset",
|
||||||
|
"lazy_static",
|
||||||
|
"log",
|
||||||
|
"memchr",
|
||||||
|
"regex",
|
||||||
|
"same-file",
|
||||||
|
"thread_local",
|
||||||
|
"walkdir",
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "1.9.3"
|
version = "1.9.3"
|
||||||
|
|
@ -2518,7 +2558,7 @@ version = "1.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87"
|
checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick 1.0.1",
|
"aho-corasick 1.1.2",
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-automata 0.4.1",
|
"regex-automata 0.4.1",
|
||||||
"regex-syntax 0.8.1",
|
"regex-syntax 0.8.1",
|
||||||
|
|
@ -2539,7 +2579,7 @@ version = "0.4.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b"
|
checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick 1.0.1",
|
"aho-corasick 1.1.2",
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-syntax 0.8.1",
|
"regex-syntax 0.8.1",
|
||||||
]
|
]
|
||||||
|
|
@ -3208,6 +3248,7 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"file-rotate",
|
"file-rotate",
|
||||||
"filenamify",
|
"filenamify",
|
||||||
|
"ignore",
|
||||||
"job_scheduler",
|
"job_scheduler",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"serde",
|
"serde",
|
||||||
|
|
@ -3225,7 +3266,6 @@ dependencies = [
|
||||||
"tree-sitter-rust",
|
"tree-sitter-rust",
|
||||||
"tree-sitter-tags",
|
"tree-sitter-tags",
|
||||||
"tree-sitter-typescript",
|
"tree-sitter-typescript",
|
||||||
"walkdir",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -3234,7 +3274,7 @@ version = "0.21.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae"
|
checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick 1.0.1",
|
"aho-corasick 1.1.2",
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"base64 0.21.2",
|
"base64 0.21.2",
|
||||||
|
|
|
||||||
|
|
@ -49,17 +49,40 @@ pub struct Repository {
|
||||||
|
|
||||||
impl Repository {
|
impl Repository {
|
||||||
pub fn dir(&self) -> PathBuf {
|
pub fn dir(&self) -> PathBuf {
|
||||||
|
if self.is_local_dir() {
|
||||||
|
let path = self.git_url.strip_prefix("file:/").unwrap();
|
||||||
|
path.into()
|
||||||
|
} else {
|
||||||
repositories_dir().join(filenamify(&self.git_url))
|
repositories_dir().join(filenamify(&self.git_url))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_local_dir(&self) -> bool {
|
||||||
|
self.git_url.starts_with("file://")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::Config;
|
use super::{Config, Repository};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn it_parses_empty_config() {
|
fn it_parses_empty_config() {
|
||||||
let config = serdeconv::from_toml_str::<Config>("");
|
let config = serdeconv::from_toml_str::<Config>("");
|
||||||
debug_assert!(config.is_ok(), "{}", config.err().unwrap());
|
debug_assert!(config.is_ok(), "{}", config.err().unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn it_parses_local_dir() {
|
||||||
|
let repo = Repository {
|
||||||
|
git_url: "file://home/user".to_owned(),
|
||||||
|
};
|
||||||
|
assert!(repo.is_local_dir());
|
||||||
|
assert_eq!(repo.dir().display().to_string(), "/home/user");
|
||||||
|
|
||||||
|
let repo = Repository {
|
||||||
|
git_url: "https://github.com/TabbyML/tabby".to_owned(),
|
||||||
|
};
|
||||||
|
assert!(!repo.is_local_dir());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@ tabby-common = { path = "../tabby-common" }
|
||||||
tantivy = { workspace = true }
|
tantivy = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
tree-sitter-tags = "0.20.2"
|
tree-sitter-tags = "0.20.2"
|
||||||
walkdir = "2.3.3"
|
|
||||||
lazy_static = { workspace = true }
|
lazy_static = { workspace = true }
|
||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
serde-jsonlines = { workspace = true }
|
serde-jsonlines = { workspace = true }
|
||||||
|
|
@ -23,6 +22,7 @@ tree-sitter-rust = "0.20.3"
|
||||||
tree-sitter-typescript = "0.20.3"
|
tree-sitter-typescript = "0.20.3"
|
||||||
tree-sitter-go = "0.20.0"
|
tree-sitter-go = "0.20.0"
|
||||||
tree-sitter-ruby= "0.20.0"
|
tree-sitter-ruby= "0.20.0"
|
||||||
|
ignore = "0.4.20"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
temp_testdir = "0.2"
|
temp_testdir = "0.2"
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ use std::{
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use file_rotate::{compression::Compression, suffix::AppendCount, ContentLimit, FileRotate};
|
use file_rotate::{compression::Compression, suffix::AppendCount, ContentLimit, FileRotate};
|
||||||
|
use ignore::{DirEntry, Walk};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use serde_jsonlines::WriteExt;
|
use serde_jsonlines::WriteExt;
|
||||||
use tabby_common::{
|
use tabby_common::{
|
||||||
|
|
@ -16,7 +17,6 @@ use tabby_common::{
|
||||||
};
|
};
|
||||||
use tracing::{error, info};
|
use tracing::{error, info};
|
||||||
use tree_sitter_tags::{TagsConfiguration, TagsContext};
|
use tree_sitter_tags::{TagsConfiguration, TagsContext};
|
||||||
use walkdir::{DirEntry, WalkDir};
|
|
||||||
|
|
||||||
trait RepositoryExt {
|
trait RepositoryExt {
|
||||||
fn create_dataset(&self, writer: &mut impl Write) -> Result<()>;
|
fn create_dataset(&self, writer: &mut impl Write) -> Result<()>;
|
||||||
|
|
@ -27,9 +27,7 @@ impl RepositoryExt for Repository {
|
||||||
let dir = self.dir();
|
let dir = self.dir();
|
||||||
|
|
||||||
info!("Start indexing repository {}", self.git_url);
|
info!("Start indexing repository {}", self.git_url);
|
||||||
let walk_dir = WalkDir::new(dir.as_path())
|
let walk_dir = Walk::new(dir.as_path())
|
||||||
.into_iter()
|
|
||||||
.filter_entry(is_not_hidden)
|
|
||||||
.filter_map(Result::ok)
|
.filter_map(Result::ok)
|
||||||
.filter(is_source_code);
|
.filter(is_source_code);
|
||||||
|
|
||||||
|
|
@ -67,21 +65,13 @@ fn get_language(ext: &OsStr) -> Option<&str> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_source_code(entry: &DirEntry) -> bool {
|
fn is_source_code(entry: &DirEntry) -> bool {
|
||||||
if entry.file_type().is_file() {
|
if entry.file_type().is_some_and(|x| x.is_file()) {
|
||||||
entry.path().extension().and_then(get_language).is_some()
|
entry.path().extension().and_then(get_language).is_some()
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_not_hidden(entry: &DirEntry) -> bool {
|
|
||||||
entry
|
|
||||||
.file_name()
|
|
||||||
.to_str()
|
|
||||||
.map(|s| entry.depth() == 0 || !s.starts_with('.'))
|
|
||||||
.unwrap_or(false)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn create_dataset(config: &Config) -> Result<()> {
|
pub fn create_dataset(config: &Config) -> Result<()> {
|
||||||
fs::remove_dir_all(dataset_dir()).ok();
|
fs::remove_dir_all(dataset_dir()).ok();
|
||||||
fs::create_dir_all(dataset_dir())?;
|
fs::create_dir_all(dataset_dir())?;
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,14 @@ trait ConfigExt {
|
||||||
impl ConfigExt for Config {
|
impl ConfigExt for Config {
|
||||||
fn sync_repositories(&self) -> Result<()> {
|
fn sync_repositories(&self) -> Result<()> {
|
||||||
for repository in self.repositories.iter() {
|
for repository in self.repositories.iter() {
|
||||||
|
if repository.is_local_dir() {
|
||||||
|
if !repository.dir().exists() {
|
||||||
|
panic!("Directory {} does not exist", repository.dir().display());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
repository.sync()?;
|
repository.sync()?;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue