diff --git a/Cargo.lock b/Cargo.lock index 6d90f43..996076e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2847,7 +2847,6 @@ dependencies = [ "temp_testdir", "tracing", "tracing-test", - "tree-sitter-java", "tree-sitter-javascript", "tree-sitter-python", "tree-sitter-tags", @@ -3507,16 +3506,6 @@ dependencies = [ "regex", ] -[[package]] -name = "tree-sitter-java" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0bf5d3f508cbffcbfe1805834101c0d24297a8b6c2184ad9c595556c46d2420" -dependencies = [ - "cc", - "tree-sitter", -] - [[package]] name = "tree-sitter-javascript" version = "0.20.0" diff --git a/crates/tabby-scheduler/Cargo.toml b/crates/tabby-scheduler/Cargo.toml index c72adc4..96060ff 100644 --- a/crates/tabby-scheduler/Cargo.toml +++ b/crates/tabby-scheduler/Cargo.toml @@ -19,7 +19,6 @@ lazy_static = { workspace = true } serde = { workspace = true } serde-jsonlines = { workspace = true } file-rotate = "0.7.5" -tree-sitter-java = "0.20.0" tree-sitter-python = "0.20.2" [dev-dependencies] diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index 87d5196..7f613ad 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -207,7 +207,8 @@ lazy_static! { ("sql", vec!["sql"]), ("scala", vec!["scala"]), ("shellscript", vec!["sh", "bash", "command", "zsh"]), - ("typescript", vec!["ts", "tsx"]), + ("typescript", vec!["ts"]), + ("tsx", vec!["tsx"]), ("tex", vec!["tex"]), ("vb", vec!["vb"]), ]) @@ -223,40 +224,16 @@ lazy_static! { map }; static ref LANGUAGE_TAGS: HashMap<&'static str, TagsConfigurationSync> = { - HashMap::from([ - ( - "javascript", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_javascript::language(), - tree_sitter_javascript::TAGGING_QUERY, - tree_sitter_javascript::LOCALS_QUERY, - ) - .unwrap(), - ), + HashMap::from([( + "python", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_python::language(), + tree_sitter_python::TAGGING_QUERY, + "", + ) + .unwrap(), ), - ( - "python", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_python::language(), - tree_sitter_python::TAGGING_QUERY, - "", - ) - .unwrap(), - ), - ), - ( - "java", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_java::language(), - tree_sitter_java::TAGGING_QUERY, - "", - ) - .unwrap(), - ), - ), - ]) + )]) }; } diff --git a/crates/tabby-scheduler/src/index.rs b/crates/tabby-scheduler/src/index.rs index 4e2d65b..7083f49 100644 --- a/crates/tabby-scheduler/src/index.rs +++ b/crates/tabby-scheduler/src/index.rs @@ -1,6 +1,7 @@ -use std::fs; +use std::{collections::HashMap, fs}; use anyhow::Result; +use lazy_static::lazy_static; use tabby_common::{config::Config, path::index_dir, Document}; use tantivy::{ directory::MmapDirectory, @@ -11,8 +12,8 @@ use tantivy::{ pub fn index_repositories(_config: &Config) -> Result<()> { let mut builder = Schema::builder(); - let git_url = builder.add_text_field("git_url", STRING | STORED); - let filepath = builder.add_text_field("filepath", STRING | STORED); + let git_url = builder.add_text_field("name", STRING | STORED); + let filepath = builder.add_text_field("body", STRING | STORED); let content = builder.add_text_field("content", TEXT | STORED); let language = builder.add_text_field("language", TEXT | STORED); let schema = builder.build(); @@ -24,6 +25,24 @@ pub fn index_repositories(_config: &Config) -> Result<()> { writer.delete_all_documents()?; for doc in Document::all()? { + for tag in doc.tags { + let name = doc.content.get(tag.name_range).unwrap(); + if name.len() < 5 { + continue; + } + + let body = doc.content.get(tag.range).unwrap(); + let count_body_lines = body.lines().count(); + if !(3..=10).contains(&count_body_lines) { + continue; + } + + if let Some(blacklist) = LANGUAGE_NAME_BLACKLIST.get(doc.language.as_str()) { + if blacklist.contains(&name) { + continue; + } + } + } writer.add_document(doc!( git_url => doc.git_url, filepath => doc.filepath, @@ -36,3 +55,8 @@ pub fn index_repositories(_config: &Config) -> Result<()> { Ok(()) } + +lazy_static! { + static ref LANGUAGE_NAME_BLACKLIST: HashMap<&'static str, Vec<&'static str>> = + HashMap::from([("python", vec!["__init__"])]); +}