fix: keep only direct dependency, clean up path (#827)
parent
1fe0922c72
commit
530e142d63
|
|
@ -32,6 +32,12 @@ jobs:
|
|||
toolchain: nightly
|
||||
components: rustfmt, clippy
|
||||
|
||||
- name: Install cargo-machete
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: install
|
||||
args: cargo-machete
|
||||
|
||||
- name: Sccache cache
|
||||
uses: mozilla-actions/sccache-action@v0.0.3
|
||||
with:
|
||||
|
|
@ -48,9 +54,6 @@ jobs:
|
|||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
|
||||
- name: Cargo Machete
|
||||
uses: bnjbvr/cargo-machete@main
|
||||
|
||||
- run: bash ./ci/prepare_build_environment.sh
|
||||
|
||||
- run: make fix
|
||||
|
|
|
|||
|
|
@ -1710,6 +1710,12 @@ version = "0.27.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.13"
|
||||
|
|
@ -4464,6 +4470,7 @@ version = "0.6.0-dev"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"filenamify",
|
||||
"glob",
|
||||
"lazy_static",
|
||||
"reqwest",
|
||||
"serde",
|
||||
|
|
@ -4509,7 +4516,6 @@ dependencies = [
|
|||
"kdam",
|
||||
"lazy_static",
|
||||
"requirements",
|
||||
"serde",
|
||||
"serde-jsonlines",
|
||||
"serde_json",
|
||||
"serdeconv",
|
||||
|
|
|
|||
2
Makefile
2
Makefile
|
|
@ -6,7 +6,7 @@ else
|
|||
endif
|
||||
|
||||
fix:
|
||||
cargo machete --fix
|
||||
cargo machete --fix || true
|
||||
cargo +nightly fmt
|
||||
cargo +nightly clippy --fix --allow-dirty --allow-staged
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ reqwest = { workspace = true, features = [ "json" ] }
|
|||
uuid = { version = "1.4.1", features = ["v4"] }
|
||||
tantivy.workspace = true
|
||||
anyhow.workspace = true
|
||||
glob = "0.3.1"
|
||||
|
||||
[features]
|
||||
testutils = []
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ use std::{
|
|||
fs::File,
|
||||
io::{BufReader, Error},
|
||||
ops::Range,
|
||||
path::PathBuf,
|
||||
};
|
||||
|
||||
use path::dataset_dir;
|
||||
|
|
@ -28,9 +29,13 @@ pub struct SourceFile {
|
|||
}
|
||||
|
||||
impl SourceFile {
|
||||
pub fn files_jsonl() -> PathBuf {
|
||||
dataset_dir().join("files.jsonl")
|
||||
}
|
||||
|
||||
pub fn all() -> Result<impl Iterator<Item = Self>, Error> {
|
||||
let iter = dataset_dir().read_dir()?.flat_map(|path| {
|
||||
let path = path.unwrap().path();
|
||||
let files = glob::glob(format!("{}*", Self::files_jsonl().display()).as_str()).unwrap();
|
||||
let iter = files.filter_map(|x| x.ok()).flat_map(|path| {
|
||||
let fp = BufReader::new(File::open(path).unwrap());
|
||||
let reader = JsonLinesReader::new(fp);
|
||||
reader.read_all::<SourceFile>().map(|x| x.unwrap())
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ pub fn repositories_dir() -> PathBuf {
|
|||
}
|
||||
|
||||
pub fn dependency_file() -> PathBuf {
|
||||
repositories_dir().join("deps.json")
|
||||
dataset_dir().join("deps.json")
|
||||
}
|
||||
|
||||
pub fn index_dir() -> PathBuf {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ tree-sitter-ruby= "0.20.0"
|
|||
ignore = "0.4.20"
|
||||
kdam = { version = "0.5.0" }
|
||||
requirements = "0.3.0"
|
||||
serde.workspace = true
|
||||
serdeconv.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ pub fn create_dataset(config: &Config) -> Result<()> {
|
|||
fs::remove_dir_all(dataset_dir()).ok();
|
||||
fs::create_dir_all(dataset_dir())?;
|
||||
let mut writer = FileRotate::new(
|
||||
dataset_dir().join("data.jsonl"),
|
||||
SourceFile::files_jsonl(),
|
||||
AppendCount::new(usize::max_value()),
|
||||
ContentLimit::Lines(1000),
|
||||
Compression::None,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
use std::{collections::HashSet, path::Path};
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::Deserialize;
|
||||
use tabby_common::{Dependency, DependencyFile};
|
||||
use tracing::warn;
|
||||
|
||||
|
|
@ -10,14 +9,6 @@ pub fn collect(path: &Path, file: &mut DependencyFile) {
|
|||
file.deps.append(&mut deps);
|
||||
}
|
||||
|
||||
if let Ok(mut deps) = process_lock_file(path, "poetry.lock", "python") {
|
||||
file.deps.append(&mut deps);
|
||||
}
|
||||
|
||||
if let Ok(mut deps) = process_lock_file(path, "Cargo.lock", "rust") {
|
||||
file.deps.append(&mut deps);
|
||||
}
|
||||
|
||||
remove_duplicates(file);
|
||||
}
|
||||
|
||||
|
|
@ -46,31 +37,6 @@ fn process_requirements_txt(path: &Path) -> Result<Vec<Dependency>> {
|
|||
Ok(deps)
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LockFileDependency {
|
||||
name: String,
|
||||
version: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LockFile {
|
||||
package: Vec<LockFileDependency>,
|
||||
}
|
||||
|
||||
fn process_lock_file(path: &Path, filename: &str, language: &str) -> Result<Vec<Dependency>> {
|
||||
let poetry_lock = path.join(filename);
|
||||
let deps: LockFile = serdeconv::from_toml_file(poetry_lock)?;
|
||||
Ok(deps
|
||||
.package
|
||||
.into_iter()
|
||||
.map(|x| Dependency {
|
||||
language: language.to_string(),
|
||||
name: x.name,
|
||||
version: Some(x.version),
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn remove_duplicates(file: &mut DependencyFile) {
|
||||
let mut keys: HashSet<(String, String)> = HashSet::default();
|
||||
let mut deps = vec![];
|
||||
|
|
|
|||
Loading…
Reference in New Issue