fix: keep only direct dependency, clean up path (#827)
parent
1fe0922c72
commit
530e142d63
|
|
@ -32,6 +32,12 @@ jobs:
|
||||||
toolchain: nightly
|
toolchain: nightly
|
||||||
components: rustfmt, clippy
|
components: rustfmt, clippy
|
||||||
|
|
||||||
|
- name: Install cargo-machete
|
||||||
|
uses: actions-rs/cargo@v1
|
||||||
|
with:
|
||||||
|
command: install
|
||||||
|
args: cargo-machete
|
||||||
|
|
||||||
- name: Sccache cache
|
- name: Sccache cache
|
||||||
uses: mozilla-actions/sccache-action@v0.0.3
|
uses: mozilla-actions/sccache-action@v0.0.3
|
||||||
with:
|
with:
|
||||||
|
|
@ -48,9 +54,6 @@ jobs:
|
||||||
~/.cargo/registry
|
~/.cargo/registry
|
||||||
~/.cargo/git
|
~/.cargo/git
|
||||||
|
|
||||||
- name: Cargo Machete
|
|
||||||
uses: bnjbvr/cargo-machete@main
|
|
||||||
|
|
||||||
- run: bash ./ci/prepare_build_environment.sh
|
- run: bash ./ci/prepare_build_environment.sh
|
||||||
|
|
||||||
- run: make fix
|
- run: make fix
|
||||||
|
|
|
||||||
|
|
@ -1710,6 +1710,12 @@ version = "0.27.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
|
checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "globset"
|
name = "globset"
|
||||||
version = "0.4.13"
|
version = "0.4.13"
|
||||||
|
|
@ -4464,6 +4470,7 @@ version = "0.6.0-dev"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"filenamify",
|
"filenamify",
|
||||||
|
"glob",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
|
|
@ -4509,7 +4516,6 @@ dependencies = [
|
||||||
"kdam",
|
"kdam",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"requirements",
|
"requirements",
|
||||||
"serde",
|
|
||||||
"serde-jsonlines",
|
"serde-jsonlines",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serdeconv",
|
"serdeconv",
|
||||||
|
|
|
||||||
2
Makefile
2
Makefile
|
|
@ -6,7 +6,7 @@ else
|
||||||
endif
|
endif
|
||||||
|
|
||||||
fix:
|
fix:
|
||||||
cargo machete --fix
|
cargo machete --fix || true
|
||||||
cargo +nightly fmt
|
cargo +nightly fmt
|
||||||
cargo +nightly clippy --fix --allow-dirty --allow-staged
|
cargo +nightly clippy --fix --allow-dirty --allow-staged
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ reqwest = { workspace = true, features = [ "json" ] }
|
||||||
uuid = { version = "1.4.1", features = ["v4"] }
|
uuid = { version = "1.4.1", features = ["v4"] }
|
||||||
tantivy.workspace = true
|
tantivy.workspace = true
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
glob = "0.3.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
testutils = []
|
testutils = []
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ use std::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{BufReader, Error},
|
io::{BufReader, Error},
|
||||||
ops::Range,
|
ops::Range,
|
||||||
|
path::PathBuf,
|
||||||
};
|
};
|
||||||
|
|
||||||
use path::dataset_dir;
|
use path::dataset_dir;
|
||||||
|
|
@ -28,9 +29,13 @@ pub struct SourceFile {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SourceFile {
|
impl SourceFile {
|
||||||
|
pub fn files_jsonl() -> PathBuf {
|
||||||
|
dataset_dir().join("files.jsonl")
|
||||||
|
}
|
||||||
|
|
||||||
pub fn all() -> Result<impl Iterator<Item = Self>, Error> {
|
pub fn all() -> Result<impl Iterator<Item = Self>, Error> {
|
||||||
let iter = dataset_dir().read_dir()?.flat_map(|path| {
|
let files = glob::glob(format!("{}*", Self::files_jsonl().display()).as_str()).unwrap();
|
||||||
let path = path.unwrap().path();
|
let iter = files.filter_map(|x| x.ok()).flat_map(|path| {
|
||||||
let fp = BufReader::new(File::open(path).unwrap());
|
let fp = BufReader::new(File::open(path).unwrap());
|
||||||
let reader = JsonLinesReader::new(fp);
|
let reader = JsonLinesReader::new(fp);
|
||||||
reader.read_all::<SourceFile>().map(|x| x.unwrap())
|
reader.read_all::<SourceFile>().map(|x| x.unwrap())
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ pub fn repositories_dir() -> PathBuf {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dependency_file() -> PathBuf {
|
pub fn dependency_file() -> PathBuf {
|
||||||
repositories_dir().join("deps.json")
|
dataset_dir().join("deps.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn index_dir() -> PathBuf {
|
pub fn index_dir() -> PathBuf {
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@ tree-sitter-ruby= "0.20.0"
|
||||||
ignore = "0.4.20"
|
ignore = "0.4.20"
|
||||||
kdam = { version = "0.5.0" }
|
kdam = { version = "0.5.0" }
|
||||||
requirements = "0.3.0"
|
requirements = "0.3.0"
|
||||||
serde.workspace = true
|
|
||||||
serdeconv.workspace = true
|
serdeconv.workspace = true
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@ pub fn create_dataset(config: &Config) -> Result<()> {
|
||||||
fs::remove_dir_all(dataset_dir()).ok();
|
fs::remove_dir_all(dataset_dir()).ok();
|
||||||
fs::create_dir_all(dataset_dir())?;
|
fs::create_dir_all(dataset_dir())?;
|
||||||
let mut writer = FileRotate::new(
|
let mut writer = FileRotate::new(
|
||||||
dataset_dir().join("data.jsonl"),
|
SourceFile::files_jsonl(),
|
||||||
AppendCount::new(usize::max_value()),
|
AppendCount::new(usize::max_value()),
|
||||||
ContentLimit::Lines(1000),
|
ContentLimit::Lines(1000),
|
||||||
Compression::None,
|
Compression::None,
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
use std::{collections::HashSet, path::Path};
|
use std::{collections::HashSet, path::Path};
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use serde::Deserialize;
|
|
||||||
use tabby_common::{Dependency, DependencyFile};
|
use tabby_common::{Dependency, DependencyFile};
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
|
|
@ -10,14 +9,6 @@ pub fn collect(path: &Path, file: &mut DependencyFile) {
|
||||||
file.deps.append(&mut deps);
|
file.deps.append(&mut deps);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Ok(mut deps) = process_lock_file(path, "poetry.lock", "python") {
|
|
||||||
file.deps.append(&mut deps);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(mut deps) = process_lock_file(path, "Cargo.lock", "rust") {
|
|
||||||
file.deps.append(&mut deps);
|
|
||||||
}
|
|
||||||
|
|
||||||
remove_duplicates(file);
|
remove_duplicates(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -46,31 +37,6 @@ fn process_requirements_txt(path: &Path) -> Result<Vec<Dependency>> {
|
||||||
Ok(deps)
|
Ok(deps)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
|
||||||
struct LockFileDependency {
|
|
||||||
name: String,
|
|
||||||
version: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
|
||||||
struct LockFile {
|
|
||||||
package: Vec<LockFileDependency>,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn process_lock_file(path: &Path, filename: &str, language: &str) -> Result<Vec<Dependency>> {
|
|
||||||
let poetry_lock = path.join(filename);
|
|
||||||
let deps: LockFile = serdeconv::from_toml_file(poetry_lock)?;
|
|
||||||
Ok(deps
|
|
||||||
.package
|
|
||||||
.into_iter()
|
|
||||||
.map(|x| Dependency {
|
|
||||||
language: language.to_string(),
|
|
||||||
name: x.name,
|
|
||||||
version: Some(x.version),
|
|
||||||
})
|
|
||||||
.collect())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn remove_duplicates(file: &mut DependencyFile) {
|
fn remove_duplicates(file: &mut DependencyFile) {
|
||||||
let mut keys: HashSet<(String, String)> = HashSet::default();
|
let mut keys: HashSet<(String, String)> = HashSet::default();
|
||||||
let mut deps = vec![];
|
let mut deps = vec![];
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue