feat: support prefix query on name field (#694)
* feat: support prefix phase query on name field * update changelogrelease-notes-05
parent
acb3a33d78
commit
2adcc0726c
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
## Notice
|
## Notice
|
||||||
* llama.cpp backend (CPU, Metal) now requires a redownload of gguf model due to upstream format changes: https://github.com/TabbyML/tabby/pull/645 https://github.com/ggerganov/llama.cpp/pull/3252
|
* llama.cpp backend (CPU, Metal) now requires a redownload of gguf model due to upstream format changes: https://github.com/TabbyML/tabby/pull/645 https://github.com/ggerganov/llama.cpp/pull/3252
|
||||||
|
* Due to indexing format changes, the `~/.tabby/index` needs to be manually removed before any further runs of `tabby scheduler`.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use tantivy::{
|
use tantivy::{
|
||||||
tokenizer::{RegexTokenizer, RemoveLongFilter, TextAnalyzer},
|
tokenizer::{NgramTokenizer, RegexTokenizer, RemoveLongFilter, TextAnalyzer},
|
||||||
Index,
|
Index,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -8,6 +8,7 @@ pub trait IndexExt {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub static CODE_TOKENIZER: &str = "code";
|
pub static CODE_TOKENIZER: &str = "code";
|
||||||
|
pub static IDENTIFIER_TOKENIZER: &str = "identifier";
|
||||||
|
|
||||||
impl IndexExt for Index {
|
impl IndexExt for Index {
|
||||||
fn register_tokenizer(&self) {
|
fn register_tokenizer(&self) {
|
||||||
|
|
@ -16,5 +17,11 @@ impl IndexExt for Index {
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
self.tokenizers().register(CODE_TOKENIZER, code_tokenizer);
|
self.tokenizers().register(CODE_TOKENIZER, code_tokenizer);
|
||||||
|
|
||||||
|
let identifier_tokenzier =
|
||||||
|
TextAnalyzer::builder(NgramTokenizer::prefix_only(2, 5).unwrap()).build();
|
||||||
|
|
||||||
|
self.tokenizers()
|
||||||
|
.register(IDENTIFIER_TOKENIZER, identifier_tokenzier);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ use std::fs;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use tabby_common::{
|
use tabby_common::{
|
||||||
config::Config,
|
config::Config,
|
||||||
index::{IndexExt, CODE_TOKENIZER},
|
index::{IndexExt, CODE_TOKENIZER, IDENTIFIER_TOKENIZER},
|
||||||
path::index_dir,
|
path::index_dir,
|
||||||
SourceFile,
|
SourceFile,
|
||||||
};
|
};
|
||||||
|
|
@ -29,10 +29,17 @@ pub fn index_repositories(_config: &Config) -> Result<()> {
|
||||||
.set_indexing_options(code_indexing_options)
|
.set_indexing_options(code_indexing_options)
|
||||||
.set_stored();
|
.set_stored();
|
||||||
|
|
||||||
|
let name_indexing_options = TextFieldIndexing::default()
|
||||||
|
.set_tokenizer(IDENTIFIER_TOKENIZER)
|
||||||
|
.set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions);
|
||||||
|
let name_options = TextOptions::default()
|
||||||
|
.set_indexing_options(name_indexing_options)
|
||||||
|
.set_stored();
|
||||||
|
|
||||||
let field_git_url = builder.add_text_field("git_url", STRING | STORED);
|
let field_git_url = builder.add_text_field("git_url", STRING | STORED);
|
||||||
let field_filepath = builder.add_text_field("filepath", STRING | STORED);
|
let field_filepath = builder.add_text_field("filepath", STRING | STORED);
|
||||||
let field_language = builder.add_text_field("language", STRING | STORED);
|
let field_language = builder.add_text_field("language", STRING | STORED);
|
||||||
let field_name = builder.add_text_field("name", STRING | STORED);
|
let field_name = builder.add_text_field("name", name_options);
|
||||||
let field_kind = builder.add_text_field("kind", STRING | STORED);
|
let field_kind = builder.add_text_field("kind", STRING | STORED);
|
||||||
let field_body = builder.add_text_field("body", code_options);
|
let field_body = builder.add_text_field("body", code_options);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue