2023-11-18 23:17:54 +00:00
|
|
|
pub mod api;
|
2023-06-05 03:08:43 +00:00
|
|
|
pub mod config;
|
2023-10-06 18:54:12 +00:00
|
|
|
pub mod index;
|
2023-10-16 00:24:44 +00:00
|
|
|
pub mod languages;
|
2023-05-30 06:39:02 +00:00
|
|
|
pub mod path;
|
2023-11-02 23:01:04 +00:00
|
|
|
pub mod registry;
|
2023-08-09 07:31:13 +00:00
|
|
|
pub mod usage;
|
2023-06-22 02:48:13 +00:00
|
|
|
|
|
|
|
|
use std::{
|
|
|
|
|
fs::File,
|
|
|
|
|
io::{BufReader, Error},
|
|
|
|
|
ops::Range,
|
2023-11-18 08:50:16 +00:00
|
|
|
path::PathBuf,
|
2023-06-22 02:48:13 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
use path::dataset_dir;
|
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
use serde_jsonlines::JsonLinesReader;
|
|
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize)]
|
2023-10-04 23:27:19 +00:00
|
|
|
pub struct SourceFile {
|
2023-06-22 02:48:13 +00:00
|
|
|
pub git_url: String,
|
|
|
|
|
pub filepath: String,
|
|
|
|
|
pub content: String,
|
|
|
|
|
pub language: String,
|
|
|
|
|
pub max_line_length: usize,
|
|
|
|
|
pub avg_line_length: f32,
|
|
|
|
|
pub alphanum_fraction: f32,
|
|
|
|
|
pub tags: Vec<Tag>,
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-04 23:27:19 +00:00
|
|
|
impl SourceFile {
|
2023-11-18 08:50:16 +00:00
|
|
|
pub fn files_jsonl() -> PathBuf {
|
|
|
|
|
dataset_dir().join("files.jsonl")
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-22 02:48:13 +00:00
|
|
|
pub fn all() -> Result<impl Iterator<Item = Self>, Error> {
|
2023-11-18 08:50:16 +00:00
|
|
|
let files = glob::glob(format!("{}*", Self::files_jsonl().display()).as_str()).unwrap();
|
|
|
|
|
let iter = files.filter_map(|x| x.ok()).flat_map(|path| {
|
2023-06-22 02:48:13 +00:00
|
|
|
let fp = BufReader::new(File::open(path).unwrap());
|
|
|
|
|
let reader = JsonLinesReader::new(fp);
|
2023-10-04 23:27:19 +00:00
|
|
|
reader.read_all::<SourceFile>().map(|x| x.unwrap())
|
2023-06-22 02:48:13 +00:00
|
|
|
});
|
|
|
|
|
Ok(iter)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
|
|
|
pub struct Tag {
|
|
|
|
|
pub range: Range<usize>,
|
|
|
|
|
pub name_range: Range<usize>,
|
|
|
|
|
pub line_range: Range<usize>,
|
|
|
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
|
|
|
pub docs: Option<String>,
|
|
|
|
|
pub is_definition: bool,
|
|
|
|
|
pub syntax_type_name: String,
|
|
|
|
|
}
|
2023-11-18 08:05:48 +00:00
|
|
|
|
|
|
|
|
#[derive(Default, Serialize, Deserialize, Clone)]
|
|
|
|
|
pub struct Dependency {
|
|
|
|
|
pub language: String,
|
|
|
|
|
pub name: String,
|
|
|
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
|
|
|
pub version: Option<String>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Default, Serialize, Deserialize)]
|
|
|
|
|
pub struct DependencyFile {
|
|
|
|
|
pub deps: Vec<Dependency>,
|
|
|
|
|
}
|