fix: revisit language stop words support

r0.3
Meng Zhang 2023-10-13 13:40:55 -07:00
parent 9c3d8138b2
commit ac73cbb6f9
1 changed files with 34 additions and 18 deletions

View File

@ -27,29 +27,47 @@ lazy_static! {
stop_words: &DEFAULT, stop_words: &DEFAULT,
line_comment: "#" line_comment: "#"
}; };
/* Python */
static ref PYTHON_STOP_WORDS: Vec<&'static str> = static ref PYTHON_STOP_WORDS: Vec<&'static str> =
vec!["\ndef", "\n#", "\nfrom", "\nclass"].with_default(); vec!["\ndef", "\n#", "\nfrom", "\nclass", "\nimport"].with_default();
static ref PYTHON: Language = Language { static ref PYTHON: Language = Language {
stop_words: &PYTHON_STOP_WORDS, stop_words: &PYTHON_STOP_WORDS,
line_comment: "#", line_comment: "#",
}; };
static ref RUST_STOP_WORDS: Vec<&'static str> =
vec!["\ndef", "\n#", "\nfrom", "\nclass"].with_default(); /* Rust */
static ref RUST_STOP_WORDS: Vec<&'static str> = vec![
"\n//", "\nfn", "\ntrait", "\nimpl", "\nenum", "\npub", "\nextern", "\nstatic",
"\ntrait", "\nunsafe", "\nuse"
]
.with_default();
static ref RUST: Language = Language { static ref RUST: Language = Language {
stop_words: &RUST_STOP_WORDS, stop_words: &RUST_STOP_WORDS,
line_comment: "//", line_comment: "//",
}; };
static ref JAVASCRIPT_STOP_WORDS: Vec<&'static str> =
vec!["\ndef", "\n#", "\nfrom", "\nclass"].with_default(); /* Javascript / Typescript */
static ref JAVASCRIPT: Language = Language { static ref JAVASCRIPT_TYPESCRIPT_STOP_WORDS: Vec<&'static str> = vec![
stop_words: &JAVASCRIPT_STOP_WORDS, "\n//",
line_comment: "", "\nabstract",
}; "\nasync",
static ref TYPESCRIPT_STOP_WORDS: Vec<&'static str> = "\nclass",
vec!["\ndef", "\n#", "\nfrom", "\nclass"].with_default(); "\nconst",
static ref TYPESCRIPT: Language = Language { "\nexport",
stop_words: &TYPESCRIPT_STOP_WORDS, "\nfunction",
line_comment: "", "\ninterface",
"\nmodule",
"\npackage",
"\ntype",
"\nvar",
"\nenum",
"\nlet",
]
.with_default();
static ref JAVASCRIPT_TYPESCRIPT: Language = Language {
stop_words: &JAVASCRIPT_TYPESCRIPT_STOP_WORDS,
line_comment: "//",
}; };
} }
@ -58,10 +76,8 @@ pub fn get_language(language: &str) -> &'static Language {
&PYTHON &PYTHON
} else if language == "rust" { } else if language == "rust" {
&RUST &RUST
} else if language == "javascript" { } else if language == "javascript" || language == "typescript" {
&JAVASCRIPT &JAVASCRIPT_TYPESCRIPT
} else if language == "typescript" {
&TYPESCRIPT
} else { } else {
&UNKONWN &UNKONWN
} }