63 lines
1.9 KiB
Rust
63 lines
1.9 KiB
Rust
use std::{collections::HashMap, fs};
|
|
|
|
use anyhow::Result;
|
|
use lazy_static::lazy_static;
|
|
use tabby_common::{config::Config, path::index_dir, Document};
|
|
use tantivy::{
|
|
directory::MmapDirectory,
|
|
doc,
|
|
schema::{Schema, STORED, STRING, TEXT},
|
|
Index,
|
|
};
|
|
|
|
pub fn index_repositories(_config: &Config) -> Result<()> {
|
|
let mut builder = Schema::builder();
|
|
let git_url = builder.add_text_field("name", STRING | STORED);
|
|
let filepath = builder.add_text_field("body", STRING | STORED);
|
|
let content = builder.add_text_field("content", TEXT | STORED);
|
|
let language = builder.add_text_field("language", TEXT | STORED);
|
|
let schema = builder.build();
|
|
|
|
fs::create_dir_all(index_dir())?;
|
|
let directory = MmapDirectory::open(index_dir())?;
|
|
let index = Index::open_or_create(directory, schema)?;
|
|
let mut writer = index.writer(10_000_000)?;
|
|
writer.delete_all_documents()?;
|
|
|
|
for doc in Document::all()? {
|
|
for tag in doc.tags {
|
|
let name = doc.content.get(tag.name_range).unwrap();
|
|
if name.len() < 5 {
|
|
continue;
|
|
}
|
|
|
|
let body = doc.content.get(tag.range).unwrap();
|
|
let count_body_lines = body.lines().count();
|
|
if !(3..=10).contains(&count_body_lines) {
|
|
continue;
|
|
}
|
|
|
|
if let Some(blacklist) = LANGUAGE_NAME_BLACKLIST.get(doc.language.as_str()) {
|
|
if blacklist.contains(&name) {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
writer.add_document(doc!(
|
|
git_url => doc.git_url,
|
|
filepath => doc.filepath,
|
|
content => doc.content,
|
|
language => doc.language,
|
|
))?;
|
|
}
|
|
|
|
writer.commit()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
lazy_static! {
|
|
static ref LANGUAGE_NAME_BLACKLIST: HashMap<&'static str, Vec<&'static str>> =
|
|
HashMap::from([("python", vec!["__init__"])]);
|
|
}
|