tabby/crates/tabby-common/src/index.rs

28 lines
786 B
Rust

use tantivy::{
tokenizer::{NgramTokenizer, RegexTokenizer, RemoveLongFilter, TextAnalyzer},
Index,
};
pub trait IndexExt {
fn register_tokenizer(&self);
}
pub static CODE_TOKENIZER: &str = "code";
pub static IDENTIFIER_TOKENIZER: &str = "identifier";
impl IndexExt for Index {
fn register_tokenizer(&self) {
let code_tokenizer = TextAnalyzer::builder(RegexTokenizer::new(r"(?:\w+)").unwrap())
.filter(RemoveLongFilter::limit(128))
.build();
self.tokenizers().register(CODE_TOKENIZER, code_tokenizer);
let identifier_tokenzier =
TextAnalyzer::builder(NgramTokenizer::prefix_only(2, 5).unwrap()).build();
self.tokenizers()
.register(IDENTIFIER_TOKENIZER, identifier_tokenzier);
}
}