feat: map js,ts,jsx and tsx to js-ts as unified language [TAB-181] (#386)

* feat: reduce js, ts, jsx and tsx to js-ts

* chore: refactor and add language reducing to both indexing and dataset jobs

* chore: only reduce language in dataset job

* chore: only reduce language in index job

* chore: fix lint

* chore: resolve comments
release-v0.1
vodkaslime 2023-09-01 01:21:39 +08:00 committed by GitHub
parent e436d63df9
commit 90aadad3ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 1 deletions

View File

@ -46,10 +46,12 @@ pub fn index_repositories(_config: &Config) -> Result<()> {
continue;
}
}
let language = reduce_language_if_needed(&doc.language);
writer.add_document(doc!(
field_git_url => doc.git_url.clone(),
field_filepath => doc.filepath.clone(),
field_language => doc.language.clone(),
field_language => language,
field_name => name,
field_body => body,
field_kind => tag.syntax_type_name,
@ -62,6 +64,14 @@ pub fn index_repositories(_config: &Config) -> Result<()> {
Ok(())
}
fn reduce_language_if_needed(language: &str) -> &str {
if ["javascript", "jsx", "typescript", "tsx"].contains(&language) {
"javascript-typescript"
} else {
language
}
}
lazy_static! {
static ref LANGUAGE_NAME_BLACKLIST: HashMap<&'static str, Vec<&'static str>> =
HashMap::from([("python", vec!["__init__"])]);