fix: add additional whitespace to match tokens that combining space and li… (#270)
* fix: add additional whitespace to match tokens that combining space and line break * fix lintsweep/improve-logging-information
parent
af517fb15b
commit
9ca1f7e5f1
|
|
@ -23,7 +23,6 @@ pub async fn scheduler(now: bool) -> Result<()> {
|
|||
let ret = dataset::create_dataset(&config);
|
||||
if let Err(err) = ret {
|
||||
error!("Failed to build dataset, err: '{}'", err);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -3,18 +3,37 @@ use std::collections::HashMap;
|
|||
use lazy_static::lazy_static;
|
||||
|
||||
lazy_static! {
|
||||
static ref DEFAULT: Vec<&'static str> = vec!("\n\n");
|
||||
static ref DEFAULT: Vec<&'static str> = vec![
|
||||
"\n\n",
|
||||
"\n\n ",
|
||||
"\n\n ",
|
||||
"\n\n ",
|
||||
"\n\n ",
|
||||
"\n\n ",
|
||||
"\n\n ",
|
||||
"\n\n ",
|
||||
"\n\n",
|
||||
"\n\n\t",
|
||||
"\n\n\t\t",
|
||||
"\n\n\t\t\t",
|
||||
"\n\n\t\t\t\t",
|
||||
"\n\n\t\t\t\t\t",
|
||||
"\n\n\t\t\t\t\t\t",
|
||||
"\n\n\t\t\t\t\t\t\t",
|
||||
];
|
||||
static ref LANGUAGES: HashMap<&'static str, Vec<&'static str>> = {
|
||||
let mut map = HashMap::new();
|
||||
map.insert("python", vec!["\n\n", "\ndef", "\n#", "\nfrom", "\nclass"]);
|
||||
map.insert(
|
||||
"python",
|
||||
vec!["\ndef", "\n#", "\nfrom", "\nclass"].with_default(),
|
||||
);
|
||||
map.insert(
|
||||
"javascript",
|
||||
vec!["\n\n", "\nfunction", "\n//", "\nimport", "\nclass"],
|
||||
vec!["\nfunction", "\n//", "\nimport", "\nclass"],
|
||||
);
|
||||
map.insert(
|
||||
"typescript",
|
||||
vec![
|
||||
"\n\n",
|
||||
"\nfunction",
|
||||
"\n//",
|
||||
"\nimport",
|
||||
|
|
@ -27,6 +46,18 @@ lazy_static! {
|
|||
};
|
||||
}
|
||||
|
||||
trait WithDefault {
|
||||
fn with_default(self) -> Self;
|
||||
}
|
||||
|
||||
impl WithDefault for Vec<&'static str> {
|
||||
fn with_default(mut self) -> Self {
|
||||
let mut x = DEFAULT.clone();
|
||||
self.append(&mut x);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_stop_words(language: &str) -> &'static Vec<&'static str> {
|
||||
LANGUAGES.get(language).unwrap_or(&DEFAULT)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue