feat: switching index to tag based (#265)
* add tag based index * update for python * fix lintsweep/improve-logging-information
parent
be90047477
commit
24d17dc1ed
|
|
@ -2847,7 +2847,6 @@ dependencies = [
|
|||
"temp_testdir",
|
||||
"tracing",
|
||||
"tracing-test",
|
||||
"tree-sitter-java",
|
||||
"tree-sitter-javascript",
|
||||
"tree-sitter-python",
|
||||
"tree-sitter-tags",
|
||||
|
|
@ -3507,16 +3506,6 @@ dependencies = [
|
|||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-java"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0bf5d3f508cbffcbfe1805834101c0d24297a8b6c2184ad9c595556c46d2420"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-javascript"
|
||||
version = "0.20.0"
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ lazy_static = { workspace = true }
|
|||
serde = { workspace = true }
|
||||
serde-jsonlines = { workspace = true }
|
||||
file-rotate = "0.7.5"
|
||||
tree-sitter-java = "0.20.0"
|
||||
tree-sitter-python = "0.20.2"
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
|||
|
|
@ -207,7 +207,8 @@ lazy_static! {
|
|||
("sql", vec!["sql"]),
|
||||
("scala", vec!["scala"]),
|
||||
("shellscript", vec!["sh", "bash", "command", "zsh"]),
|
||||
("typescript", vec!["ts", "tsx"]),
|
||||
("typescript", vec!["ts"]),
|
||||
("tsx", vec!["tsx"]),
|
||||
("tex", vec!["tex"]),
|
||||
("vb", vec!["vb"]),
|
||||
])
|
||||
|
|
@ -223,40 +224,16 @@ lazy_static! {
|
|||
map
|
||||
};
|
||||
static ref LANGUAGE_TAGS: HashMap<&'static str, TagsConfigurationSync> = {
|
||||
HashMap::from([
|
||||
(
|
||||
"javascript",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_javascript::language(),
|
||||
tree_sitter_javascript::TAGGING_QUERY,
|
||||
tree_sitter_javascript::LOCALS_QUERY,
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
HashMap::from([(
|
||||
"python",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_python::language(),
|
||||
tree_sitter_python::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
(
|
||||
"python",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_python::language(),
|
||||
tree_sitter_python::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"java",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_java::language(),
|
||||
tree_sitter_java::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
])
|
||||
)])
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::fs;
|
||||
use std::{collections::HashMap, fs};
|
||||
|
||||
use anyhow::Result;
|
||||
use lazy_static::lazy_static;
|
||||
use tabby_common::{config::Config, path::index_dir, Document};
|
||||
use tantivy::{
|
||||
directory::MmapDirectory,
|
||||
|
|
@ -11,8 +12,8 @@ use tantivy::{
|
|||
|
||||
pub fn index_repositories(_config: &Config) -> Result<()> {
|
||||
let mut builder = Schema::builder();
|
||||
let git_url = builder.add_text_field("git_url", STRING | STORED);
|
||||
let filepath = builder.add_text_field("filepath", STRING | STORED);
|
||||
let git_url = builder.add_text_field("name", STRING | STORED);
|
||||
let filepath = builder.add_text_field("body", STRING | STORED);
|
||||
let content = builder.add_text_field("content", TEXT | STORED);
|
||||
let language = builder.add_text_field("language", TEXT | STORED);
|
||||
let schema = builder.build();
|
||||
|
|
@ -24,6 +25,24 @@ pub fn index_repositories(_config: &Config) -> Result<()> {
|
|||
writer.delete_all_documents()?;
|
||||
|
||||
for doc in Document::all()? {
|
||||
for tag in doc.tags {
|
||||
let name = doc.content.get(tag.name_range).unwrap();
|
||||
if name.len() < 5 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let body = doc.content.get(tag.range).unwrap();
|
||||
let count_body_lines = body.lines().count();
|
||||
if !(3..=10).contains(&count_body_lines) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(blacklist) = LANGUAGE_NAME_BLACKLIST.get(doc.language.as_str()) {
|
||||
if blacklist.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.add_document(doc!(
|
||||
git_url => doc.git_url,
|
||||
filepath => doc.filepath,
|
||||
|
|
@ -36,3 +55,8 @@ pub fn index_repositories(_config: &Config) -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref LANGUAGE_NAME_BLACKLIST: HashMap<&'static str, Vec<&'static str>> =
|
||||
HashMap::from([("python", vec!["__init__"])]);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue