feat: map js,ts,jsx and tsx to js-ts as unified language [TAB-181] (#386)

* feat: reduce js, ts, jsx and tsx to js-ts

* chore: refactor and add language reducing to both indexing and dataset jobs

* chore: only reduce language in dataset job

* chore: only reduce language in index job

* chore: fix lint

* chore: resolve comments
release-v0.1
vodkaslime 2023-09-01 01:21:39 +08:00 committed by GitHub
parent e436d63df9
commit 90aadad3ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 1 deletions

View File

@ -46,10 +46,12 @@ pub fn index_repositories(_config: &Config) -> Result<()> {
continue; continue;
} }
} }
let language = reduce_language_if_needed(&doc.language);
writer.add_document(doc!( writer.add_document(doc!(
field_git_url => doc.git_url.clone(), field_git_url => doc.git_url.clone(),
field_filepath => doc.filepath.clone(), field_filepath => doc.filepath.clone(),
field_language => doc.language.clone(), field_language => language,
field_name => name, field_name => name,
field_body => body, field_body => body,
field_kind => tag.syntax_type_name, field_kind => tag.syntax_type_name,
@ -62,6 +64,14 @@ pub fn index_repositories(_config: &Config) -> Result<()> {
Ok(()) Ok(())
} }
fn reduce_language_if_needed(language: &str) -> &str {
if ["javascript", "jsx", "typescript", "tsx"].contains(&language) {
"javascript-typescript"
} else {
language
}
}
lazy_static! { lazy_static! {
static ref LANGUAGE_NAME_BLACKLIST: HashMap<&'static str, Vec<&'static str>> = static ref LANGUAGE_NAME_BLACKLIST: HashMap<&'static str, Vec<&'static str>> =
HashMap::from([("python", vec!["__init__"])]); HashMap::from([("python", vec!["__init__"])]);