pub mod config; pub mod index; pub mod languages; pub mod path; pub mod registry; pub mod usage; use std::{ fs::File, io::{BufReader, Error}, ops::Range, path::PathBuf, }; use path::dataset_dir; use serde::{Deserialize, Serialize}; use serde_jsonlines::JsonLinesReader; #[derive(Serialize, Deserialize)] pub struct SourceFile { pub git_url: String, pub filepath: String, pub content: String, pub language: String, pub max_line_length: usize, pub avg_line_length: f32, pub alphanum_fraction: f32, pub tags: Vec, } impl SourceFile { pub fn files_jsonl() -> PathBuf { dataset_dir().join("files.jsonl") } pub fn all() -> Result, Error> { let files = glob::glob(format!("{}*", Self::files_jsonl().display()).as_str()).unwrap(); let iter = files.filter_map(|x| x.ok()).flat_map(|path| { let fp = BufReader::new(File::open(path).unwrap()); let reader = JsonLinesReader::new(fp); reader.read_all::().map(|x| x.unwrap()) }); Ok(iter) } } #[derive(Serialize, Deserialize)] pub struct Tag { pub range: Range, pub name_range: Range, pub line_range: Range, #[serde(skip_serializing_if = "Option::is_none")] pub docs: Option, pub is_definition: bool, pub syntax_type_name: String, } #[derive(Default, Serialize, Deserialize, Clone)] pub struct Dependency { pub language: String, pub name: String, #[serde(skip_serializing_if = "Option::is_none")] pub version: Option, } #[derive(Default, Serialize, Deserialize)] pub struct DependencyFile { pub deps: Vec, }