diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index 3489a7f..5717761 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -32,6 +32,12 @@ jobs: toolchain: nightly components: rustfmt, clippy + - name: Install cargo-machete + uses: actions-rs/cargo@v1 + with: + command: install + args: cargo-machete + - name: Sccache cache uses: mozilla-actions/sccache-action@v0.0.3 with: @@ -48,9 +54,6 @@ jobs: ~/.cargo/registry ~/.cargo/git - - name: Cargo Machete - uses: bnjbvr/cargo-machete@main - - run: bash ./ci/prepare_build_environment.sh - run: make fix diff --git a/Cargo.lock b/Cargo.lock index 3aef1df..490c128 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1710,6 +1710,12 @@ version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "globset" version = "0.4.13" @@ -4464,6 +4470,7 @@ version = "0.6.0-dev" dependencies = [ "anyhow", "filenamify", + "glob", "lazy_static", "reqwest", "serde", @@ -4509,7 +4516,6 @@ dependencies = [ "kdam", "lazy_static", "requirements", - "serde", "serde-jsonlines", "serde_json", "serdeconv", diff --git a/Makefile b/Makefile index 9c56e95..3447eef 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ else endif fix: - cargo machete --fix + cargo machete --fix || true cargo +nightly fmt cargo +nightly clippy --fix --allow-dirty --allow-staged diff --git a/crates/tabby-common/Cargo.toml b/crates/tabby-common/Cargo.toml index c222370..9ef3e33 100644 --- a/crates/tabby-common/Cargo.toml +++ b/crates/tabby-common/Cargo.toml @@ -13,6 +13,7 @@ reqwest = { workspace = true, features = [ "json" ] } uuid = { version = "1.4.1", features = ["v4"] } tantivy.workspace = true anyhow.workspace = true +glob = "0.3.1" [features] testutils = [] diff --git a/crates/tabby-common/src/lib.rs b/crates/tabby-common/src/lib.rs index c202833..cce151d 100644 --- a/crates/tabby-common/src/lib.rs +++ b/crates/tabby-common/src/lib.rs @@ -9,6 +9,7 @@ use std::{ fs::File, io::{BufReader, Error}, ops::Range, + path::PathBuf, }; use path::dataset_dir; @@ -28,9 +29,13 @@ pub struct SourceFile { } impl SourceFile { + pub fn files_jsonl() -> PathBuf { + dataset_dir().join("files.jsonl") + } + pub fn all() -> Result, Error> { - let iter = dataset_dir().read_dir()?.flat_map(|path| { - let path = path.unwrap().path(); + let files = glob::glob(format!("{}*", Self::files_jsonl().display()).as_str()).unwrap(); + let iter = files.filter_map(|x| x.ok()).flat_map(|path| { let fp = BufReader::new(File::open(path).unwrap()); let reader = JsonLinesReader::new(fp); reader.read_all::().map(|x| x.unwrap()) diff --git a/crates/tabby-common/src/path.rs b/crates/tabby-common/src/path.rs index aaec53a..44a1cab 100644 --- a/crates/tabby-common/src/path.rs +++ b/crates/tabby-common/src/path.rs @@ -36,7 +36,7 @@ pub fn repositories_dir() -> PathBuf { } pub fn dependency_file() -> PathBuf { - repositories_dir().join("deps.json") + dataset_dir().join("deps.json") } pub fn index_dir() -> PathBuf { diff --git a/crates/tabby-scheduler/Cargo.toml b/crates/tabby-scheduler/Cargo.toml index 188ec7f..e0f49e8 100644 --- a/crates/tabby-scheduler/Cargo.toml +++ b/crates/tabby-scheduler/Cargo.toml @@ -24,7 +24,6 @@ tree-sitter-ruby= "0.20.0" ignore = "0.4.20" kdam = { version = "0.5.0" } requirements = "0.3.0" -serde.workspace = true serdeconv.workspace = true [dev-dependencies] diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index e0ada78..4cd4ca0 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -87,7 +87,7 @@ pub fn create_dataset(config: &Config) -> Result<()> { fs::remove_dir_all(dataset_dir()).ok(); fs::create_dir_all(dataset_dir())?; let mut writer = FileRotate::new( - dataset_dir().join("data.jsonl"), + SourceFile::files_jsonl(), AppendCount::new(usize::max_value()), ContentLimit::Lines(1000), Compression::None, diff --git a/crates/tabby-scheduler/src/dataset/deps.rs b/crates/tabby-scheduler/src/dataset/deps.rs index a8d4eb0..521b21f 100644 --- a/crates/tabby-scheduler/src/dataset/deps.rs +++ b/crates/tabby-scheduler/src/dataset/deps.rs @@ -1,7 +1,6 @@ use std::{collections::HashSet, path::Path}; use anyhow::Result; -use serde::Deserialize; use tabby_common::{Dependency, DependencyFile}; use tracing::warn; @@ -10,14 +9,6 @@ pub fn collect(path: &Path, file: &mut DependencyFile) { file.deps.append(&mut deps); } - if let Ok(mut deps) = process_lock_file(path, "poetry.lock", "python") { - file.deps.append(&mut deps); - } - - if let Ok(mut deps) = process_lock_file(path, "Cargo.lock", "rust") { - file.deps.append(&mut deps); - } - remove_duplicates(file); } @@ -46,31 +37,6 @@ fn process_requirements_txt(path: &Path) -> Result> { Ok(deps) } -#[derive(Deserialize)] -struct LockFileDependency { - name: String, - version: String, -} - -#[derive(Deserialize)] -struct LockFile { - package: Vec, -} - -fn process_lock_file(path: &Path, filename: &str, language: &str) -> Result> { - let poetry_lock = path.join(filename); - let deps: LockFile = serdeconv::from_toml_file(poetry_lock)?; - Ok(deps - .package - .into_iter() - .map(|x| Dependency { - language: language.to_string(), - name: x.name, - version: Some(x.version), - }) - .collect()) -} - fn remove_duplicates(file: &mut DependencyFile) { let mut keys: HashSet<(String, String)> = HashSet::default(); let mut deps = vec![];