feat: implement basic dependency detection for python / rust (#825)
* refactor: extract tags mod for dataset.rs * feat: implement basic dependency detection for python / rustrelease-fix-intellij-update-support-version-range
parent
5e7ca4f569
commit
1fe0922c72
|
|
@ -1723,6 +1723,16 @@ dependencies = [
|
|||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "globwalk"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9db17aec586697a93219b19726b5b68307eba92898c34b170857343fe67c99d"
|
||||
dependencies = [
|
||||
"ignore",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gloo-timers"
|
||||
version = "0.2.6"
|
||||
|
|
@ -3029,6 +3039,51 @@ version = "2.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
||||
|
||||
[[package]]
|
||||
name = "pest"
|
||||
version = "2.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae9cee2a55a544be8b89dc6848072af97a20f2422603c10865be2a42b580fff5"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"thiserror",
|
||||
"ucd-trie",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_derive"
|
||||
version = "2.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81d78524685f5ef2a3b3bd1cafbc9fcabb036253d9b1463e726a91cd16e2dfc2"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_generator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_generator"
|
||||
version = "2.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68bd1206e71118b5356dae5ddc61c8b11e28b09ef6a31acbd15ea48a28e0c227"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_meta",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.28",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pest_meta"
|
||||
version = "2.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c747191d4ad9e4a4ab9c8798f1e82a39affe7ef9648390b7e5548d18e099de6"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"pest",
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.6.3"
|
||||
|
|
@ -3444,6 +3499,19 @@ version = "0.8.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56d84fdd47036b038fc80dd333d10b6aab10d5d31f4a366e20014def75328d33"
|
||||
|
||||
[[package]]
|
||||
name = "requirements"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2643e903f79d8e6bc310ee0def974d12a33561d14e0728511b6ba5e8be0791c3"
|
||||
dependencies = [
|
||||
"globwalk",
|
||||
"pest",
|
||||
"pest_derive",
|
||||
"regex",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.22"
|
||||
|
|
@ -4440,8 +4508,11 @@ dependencies = [
|
|||
"job_scheduler",
|
||||
"kdam",
|
||||
"lazy_static",
|
||||
"requirements",
|
||||
"serde",
|
||||
"serde-jsonlines",
|
||||
"serde_json",
|
||||
"serdeconv",
|
||||
"tabby-common",
|
||||
"tantivy",
|
||||
"temp_testdir",
|
||||
|
|
@ -5399,6 +5470,12 @@ version = "1.16.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
|
||||
|
||||
[[package]]
|
||||
name = "ucd-trie"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.7.0"
|
||||
|
|
|
|||
|
|
@ -49,3 +49,16 @@ pub struct Tag {
|
|||
pub is_definition: bool,
|
||||
pub syntax_type_name: String,
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize, Deserialize, Clone)]
|
||||
pub struct Dependency {
|
||||
pub language: String,
|
||||
pub name: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub version: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize, Deserialize)]
|
||||
pub struct DependencyFile {
|
||||
pub deps: Vec<Dependency>,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,10 @@ pub fn repositories_dir() -> PathBuf {
|
|||
tabby_root().join("repositories")
|
||||
}
|
||||
|
||||
pub fn dependency_file() -> PathBuf {
|
||||
repositories_dir().join("deps.json")
|
||||
}
|
||||
|
||||
pub fn index_dir() -> PathBuf {
|
||||
tabby_root().join("index")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ tree-sitter-go = "0.20.0"
|
|||
tree-sitter-ruby= "0.20.0"
|
||||
ignore = "0.4.20"
|
||||
kdam = { version = "0.5.0" }
|
||||
requirements = "0.3.0"
|
||||
serde.workspace = true
|
||||
serdeconv.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
temp_testdir = "0.2"
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
mod deps;
|
||||
mod tags;
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
ffi::OsStr,
|
||||
|
|
@ -13,11 +16,11 @@ use lazy_static::lazy_static;
|
|||
use serde_jsonlines::WriteExt;
|
||||
use tabby_common::{
|
||||
config::{Config, RepositoryConfig},
|
||||
path::dataset_dir,
|
||||
SourceFile,
|
||||
path::{dataset_dir, dependency_file},
|
||||
DependencyFile, SourceFile,
|
||||
};
|
||||
use tracing::error;
|
||||
use tree_sitter_tags::{TagsConfiguration, TagsContext};
|
||||
use tree_sitter_tags::TagsContext;
|
||||
|
||||
use crate::utils::tqdm;
|
||||
|
||||
|
|
@ -92,10 +95,14 @@ pub fn create_dataset(config: &Config) -> Result<()> {
|
|||
None,
|
||||
);
|
||||
|
||||
let mut deps = DependencyFile::default();
|
||||
for repository in config.repositories.as_slice() {
|
||||
deps::collect(repository.dir().as_path(), &mut deps);
|
||||
repository.create_dataset(&mut writer)?;
|
||||
}
|
||||
|
||||
serdeconv::to_json_file(&deps, dependency_file())?;
|
||||
|
||||
writer.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -135,47 +142,6 @@ mod metrics {
|
|||
}
|
||||
}
|
||||
|
||||
mod tags {
|
||||
use tabby_common::Tag;
|
||||
use tree_sitter_tags::TagsContext;
|
||||
|
||||
use super::LANGUAGE_TAGS;
|
||||
|
||||
pub fn collect(context: &mut TagsContext, language: &str, content: &str) -> Vec<Tag> {
|
||||
let config = LANGUAGE_TAGS.get(language);
|
||||
let empty = Vec::new();
|
||||
|
||||
let Some(config) = config else {
|
||||
return empty;
|
||||
};
|
||||
|
||||
let Ok((tags, has_error)) = context.generate_tags(&config.0, content.as_bytes(), None)
|
||||
else {
|
||||
return empty;
|
||||
};
|
||||
|
||||
if has_error {
|
||||
return empty;
|
||||
}
|
||||
|
||||
tags.filter_map(|x| x.ok())
|
||||
.map(|x| Tag {
|
||||
range: x.range,
|
||||
name_range: x.name_range,
|
||||
line_range: x.line_range,
|
||||
docs: x.docs,
|
||||
is_definition: x.is_definition,
|
||||
syntax_type_name: config.0.syntax_type_name(x.syntax_type_id).to_owned(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
// Mark TagsConfiguration as thread sync / safe.
|
||||
struct TagsConfigurationSync(TagsConfiguration);
|
||||
unsafe impl Send for TagsConfigurationSync {}
|
||||
unsafe impl Sync for TagsConfigurationSync {}
|
||||
|
||||
lazy_static! {
|
||||
static ref LANGUAGE_EXTENSION: HashMap<&'static str, Vec<&'static str>> = {
|
||||
HashMap::from([
|
||||
|
|
@ -222,74 +188,4 @@ lazy_static! {
|
|||
|
||||
map
|
||||
};
|
||||
static ref LANGUAGE_TAGS: HashMap<&'static str, TagsConfigurationSync> = {
|
||||
HashMap::from([
|
||||
(
|
||||
"python",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_python::language(),
|
||||
tree_sitter_python::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"rust",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_rust::language(),
|
||||
tree_sitter_rust::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"java",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_java::language(),
|
||||
tree_sitter_java::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"javascript-typescript",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_typescript::language_tsx(),
|
||||
include_str!("../queries/tsx.scm"),
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"go",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_go::language(),
|
||||
include_str!("../queries/go.scm"),
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"ruby",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_ruby::language(),
|
||||
tree_sitter_ruby::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
])
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,86 @@
|
|||
use std::{collections::HashSet, path::Path};
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::Deserialize;
|
||||
use tabby_common::{Dependency, DependencyFile};
|
||||
use tracing::warn;
|
||||
|
||||
pub fn collect(path: &Path, file: &mut DependencyFile) {
|
||||
if let Ok(mut deps) = process_requirements_txt(path) {
|
||||
file.deps.append(&mut deps);
|
||||
}
|
||||
|
||||
if let Ok(mut deps) = process_lock_file(path, "poetry.lock", "python") {
|
||||
file.deps.append(&mut deps);
|
||||
}
|
||||
|
||||
if let Ok(mut deps) = process_lock_file(path, "Cargo.lock", "rust") {
|
||||
file.deps.append(&mut deps);
|
||||
}
|
||||
|
||||
remove_duplicates(file);
|
||||
}
|
||||
|
||||
fn process_requirements_txt(path: &Path) -> Result<Vec<Dependency>> {
|
||||
let requirements_txt = path.join("requirements.txt");
|
||||
let content = std::fs::read_to_string(requirements_txt)?;
|
||||
|
||||
let mut deps = vec![];
|
||||
match requirements::parse_str(&content) {
|
||||
Ok(requirements) => {
|
||||
for requirement in requirements {
|
||||
if let Some(name) = requirement.name {
|
||||
deps.push(Dependency {
|
||||
language: "python".to_owned(),
|
||||
name,
|
||||
version: None, // requirements.txt doesn't come with accurate version information.
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Failed to parse requirements.txt: {}", err);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(deps)
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LockFileDependency {
|
||||
name: String,
|
||||
version: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LockFile {
|
||||
package: Vec<LockFileDependency>,
|
||||
}
|
||||
|
||||
fn process_lock_file(path: &Path, filename: &str, language: &str) -> Result<Vec<Dependency>> {
|
||||
let poetry_lock = path.join(filename);
|
||||
let deps: LockFile = serdeconv::from_toml_file(poetry_lock)?;
|
||||
Ok(deps
|
||||
.package
|
||||
.into_iter()
|
||||
.map(|x| Dependency {
|
||||
language: language.to_string(),
|
||||
name: x.name,
|
||||
version: Some(x.version),
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn remove_duplicates(file: &mut DependencyFile) {
|
||||
let mut keys: HashSet<(String, String)> = HashSet::default();
|
||||
let mut deps = vec![];
|
||||
for x in &file.deps {
|
||||
let key = (x.language.clone(), x.name.clone());
|
||||
if !keys.contains(&key) {
|
||||
keys.insert(key);
|
||||
deps.push(x.clone());
|
||||
}
|
||||
}
|
||||
|
||||
file.deps = deps;
|
||||
}
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use tabby_common::Tag;
|
||||
use tree_sitter_tags::{TagsConfiguration, TagsContext};
|
||||
|
||||
pub fn collect(context: &mut TagsContext, language: &str, content: &str) -> Vec<Tag> {
|
||||
let config = LANGUAGE_TAGS.get(language);
|
||||
let empty = Vec::new();
|
||||
|
||||
let Some(config) = config else {
|
||||
return empty;
|
||||
};
|
||||
|
||||
let Ok((tags, has_error)) = context.generate_tags(&config.0, content.as_bytes(), None) else {
|
||||
return empty;
|
||||
};
|
||||
|
||||
if has_error {
|
||||
return empty;
|
||||
}
|
||||
|
||||
tags.filter_map(|x| x.ok())
|
||||
.map(|x| Tag {
|
||||
range: x.range,
|
||||
name_range: x.name_range,
|
||||
line_range: x.line_range,
|
||||
docs: x.docs,
|
||||
is_definition: x.is_definition,
|
||||
syntax_type_name: config.0.syntax_type_name(x.syntax_type_id).to_owned(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Mark TagsConfiguration as thread sync / safe.
|
||||
struct TagsConfigurationSync(TagsConfiguration);
|
||||
unsafe impl Send for TagsConfigurationSync {}
|
||||
unsafe impl Sync for TagsConfigurationSync {}
|
||||
|
||||
lazy_static! {
|
||||
static ref LANGUAGE_TAGS: HashMap<&'static str, TagsConfigurationSync> = {
|
||||
HashMap::from([
|
||||
(
|
||||
"python",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_python::language(),
|
||||
tree_sitter_python::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"rust",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_rust::language(),
|
||||
tree_sitter_rust::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"java",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_java::language(),
|
||||
tree_sitter_java::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"javascript-typescript",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_typescript::language_tsx(),
|
||||
include_str!("../../queries/tsx.scm"),
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"go",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_go::language(),
|
||||
include_str!("../../queries/go.scm"),
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"ruby",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_ruby::language(),
|
||||
tree_sitter_ruby::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
])
|
||||
};
|
||||
}
|
||||
Loading…
Reference in New Issue