tabby/crates/tabby-scheduler/src/index.rs

84 lines
2.5 KiB
Rust
Raw Normal View History

use std::fs::{self, read_to_string};
use anyhow::Result;
use tabby_common::{
config::{Config, Repository},
path::index_dir,
};
use tantivy::{
directory::MmapDirectory,
doc,
schema::{Schema, STORED, STRING, TEXT},
Index, IndexWriter,
};
use tracing::{info, warn};
use walkdir::{DirEntry, WalkDir};
trait RepositoryExt {
fn index(&self, schema: &Schema, writer: &mut IndexWriter);
}
impl RepositoryExt for Repository {
fn index(&self, schema: &Schema, writer: &mut IndexWriter) {
let git_url = schema.get_field("git_url").unwrap();
let filepath = schema.get_field("filepath").unwrap();
let content = schema.get_field("content").unwrap();
let dir = self.dir();
info!("Start indexing repository {}", self.git_url);
let walk_dir = WalkDir::new(dir.as_path())
.into_iter()
.filter_entry(is_not_hidden)
.filter_map(Result::ok)
.filter(|e| !e.file_type().is_dir());
for entry in walk_dir {
let relative_path = entry.path().strip_prefix(dir.as_path()).unwrap();
if let Ok(file_content) = read_to_string(entry.path()) {
info!("Indexing {:?}", relative_path);
writer
.add_document(doc!(
git_url => self.git_url.clone(),
filepath => relative_path.display().to_string(),
content => file_content,
))
.unwrap();
} else {
warn!("Skip {:?}", relative_path);
}
}
}
}
fn is_not_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| entry.depth() == 0 || !s.starts_with('.'))
.unwrap_or(false)
}
fn create_schema() -> Schema {
let mut builder = Schema::builder();
builder.add_text_field("git_url", STRING | STORED);
builder.add_text_field("filepath", STRING | STORED);
builder.add_text_field("content", TEXT | STORED);
builder.build()
}
pub fn index_repositories(config: &Config) {
let schema = create_schema();
fs::create_dir_all(index_dir()).unwrap();
let directory = MmapDirectory::open(index_dir()).unwrap();
let index = Index::open_or_create(directory, schema.clone()).unwrap();
let mut writer = index.writer(10_000_000).unwrap();
writer.delete_all_documents().unwrap();
for repository in config.repositories.as_slice() {
repository.index(&schema, &mut writer);
}
writer.commit().unwrap();
}