feat: add api to serve files under repositories (#851)

* feat: add api to serve files under repositories

* resolve comments

* resolve comments

* Update repositories.rs

* Update lib.rs

* resolve comment

---------

Co-authored-by: Meng Zhang <meng@tabbyml.com>
wsxiaoys-patch-3
Eric 2023-11-24 09:58:56 +08:00 committed by GitHub
parent 821ca2dead
commit e78cc1f844
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 382 additions and 3 deletions

8
Cargo.lock generated
View File

@ -4704,6 +4704,8 @@ dependencies = [
"tokio",
"tokio-rusqlite",
"tokio-tungstenite",
"tower",
"tower-http 0.4.0",
"tracing",
"unicase",
"uuid 1.4.1",
@ -5315,10 +5317,16 @@ dependencies = [
"http",
"http-body",
"http-range-header",
"httpdate",
"mime",
"mime_guess",
"percent-encoding",
"pin-project-lite",
"tokio",
"tokio-util",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]

View File

@ -50,13 +50,17 @@ impl RepositoryConfig {
let path = self.git_url.strip_prefix("file://").unwrap();
path.into()
} else {
repositories_dir().join(filenamify(&self.git_url))
repositories_dir().join(self.name())
}
}
pub fn is_local_dir(&self) -> bool {
self.git_url.starts_with("file://")
}
pub fn name(&self) -> String {
filenamify(&self.git_url)
}
}
#[derive(Serialize, Deserialize)]
@ -96,4 +100,12 @@ mod tests {
};
assert!(!repo.is_local_dir());
}
#[test]
fn test_repository_config_name() {
let repo = RepositoryConfig {
git_url: "https://github.com/TabbyML/tabby.git".to_owned(),
};
assert_eq!(repo.name(), "https_github.com_TabbyML_tabby.git");
}
}

View File

@ -45,7 +45,7 @@ impl SourceFile {
}
}
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Tag {
pub range: Range<usize>,
pub name_range: Range<usize>,

View File

@ -29,6 +29,8 @@ thiserror.workspace = true
tokio = { workspace = true, features = ["fs"] }
tokio-rusqlite = "0.4.0"
tokio-tungstenite = "0.20.1"
tower = { version = "0.4", features = ["util"] }
tower-http = { version = "0.4.0", features = ["fs", "trace"] }
tracing.workspace = true
unicase = "2.7.0"

View File

@ -0,0 +1,143 @@
# API Specs
## Repository api: `/repositories`
### Resolve
Get file or directory content from local repositories
**URL:** `/repositories/{name}/resolve/{path}`
**Method:** `GET`
**Request examples:**
- Get directory content
```shell
curl --request GET \
--url http://localhost:8080/repositories/https_github.com_TabbyML_tabby.git/resolve/
curl --request GET \
--url http://localhost:9090/repositories/https_github.com_TabbyML_tabby.git/resolve/ee/tabby-webserver/
```
- Get file content
```shell
curl --request GET \
--url http://localhost:8080/repositories/https_github.com_TabbyML_tabby.git/resolve/package.json
curl --request GET \
--url http://localhost:9090/repositories/https_github.com_TabbyML_tabby.git/resolve/ee/tabby-webserver/src/api.rs
```
**Response examples:**
- All directory query will return a list of string, with each string represents an entry under that directory. The `Content-Type` for directory query is `application/vnd.directory+json`.
For `/repositories/https_github.com_TabbyML_tabby.git/resolve/ee/tabby-webserver/`, the response is:
```json
{
"entries": [
"ee/tabby-webserver/src",
"ee/tabby-webserver/ui",
"ee/tabby-webserver/examples",
"ee/tabby-webserver/Cargo.toml",
"ee/tabby-webserver/graphql"
]
}
```
- The file query will return file content, the `Content-Type` will be guessed from the file extension.
For request `/repositories/https_github.com_TabbyML_tabby.git/resolve/package.json`, the content type is `application/json`, and the response is:
```json
{
"private": true,
"workspaces": [
"clients/tabby-agent",
"clients/vscode",
"clients/vim",
"clients/intellij"
],
"engines": {
"node": ">=18"
}
}
```
For request `/repositories/https_github.com_TabbyML_tabby.git/resolve/ee/tabby-webserver/src/api.rs`, the content type is `text/x-rust`, and the response is:
```text
use async_trait::async_trait;
use juniper::{GraphQLEnum, GraphQLObject};
use serde::{Deserialize, Serialize};
use tabby_common::api::{
code::{CodeSearch, CodeSearchError, SearchResponse},
event::RawEventLogger,
};
use thiserror::Error;
use tokio_tungstenite::connect_async;
use crate::websocket::WebSocketTransport;
#[derive(GraphQLEnum, Serialize, Deserialize, Clone, Debug)]
pub enum WorkerKind {
Completion,
Chat,
}
......omit......
```
### Meta
Get dataset entry for each indexed file in the repository
**URL:** `/repositories/{name}/meta/{path}`
**Method:** `GET`
**Request example:**
```shell
curl --request GET \
--url http://localhost:9090/repositories/https_github.com_TabbyML_tabby.git/meta/ee/tabby-webserver/src/lib.rs
```
**Response example:**
The `Content-Type` for successful response is always `application/json`.
```json
{
"git_url": "https://github.com/TabbyML/tabby.git",
"filepath": "ee/tabby-webserver/src/lib.rs",
"language": "rust",
"max_line_length": 88,
"avg_line_length": 26.340782,
"alphanum_fraction": 0.56416017,
"tags": [
{
"range": {
"start": 0,
"end": 12
},
"name_range": {
"start": 8,
"end": 11
},
"line_range": {
"start": 0,
"end": 12
},
"is_definition": true,
"syntax_type_name": "module"
},
......omit......
]
}
```

View File

@ -11,6 +11,7 @@ use tracing::{error, warn};
use websocket::WebSocketTransport;
mod db;
mod repositories;
mod server;
mod ui;
mod websocket;
@ -49,7 +50,8 @@ pub async fn attach_webserver(
)
.route("/graphql", routing::get(playground("/graphql", None)))
.layer(Extension(schema))
.route("/hub", routing::get(ws_handler).with_state(ctx));
.route("/hub", routing::get(ws_handler).with_state(ctx))
.nest("/repositories", repositories::routes());
let ui = ui
.route("/graphiql", routing::get(graphiql("/graphql", None)))

View File

@ -0,0 +1,56 @@
mod resolve;
use anyhow::Result;
use axum::{extract::Path, http::StatusCode, response::Response, routing, Json, Router};
use tabby_common::path::repositories_dir;
use tracing::{instrument, warn};
use crate::{
repositories,
repositories::resolve::{resolve_dir, resolve_file, resolve_meta, Meta, ResolveParams},
};
pub fn routes() -> Router {
Router::new()
.route("/:name/resolve/", routing::get(repositories::resolve))
.route("/:name/resolve/*path", routing::get(repositories::resolve))
.route("/:name/meta/", routing::get(repositories::meta))
.route("/:name/meta/*path", routing::get(repositories::meta))
}
#[instrument(skip(repo))]
async fn resolve(Path(repo): Path<ResolveParams>) -> Result<Response, StatusCode> {
let root = repositories_dir().join(repo.name_str());
let full_path = root.join(repo.path_str());
let is_dir = tokio::fs::metadata(full_path.clone())
.await
.map(|m| m.is_dir())
.unwrap_or(false);
if is_dir {
return match resolve_dir(root, full_path.clone()).await {
Ok(resp) => Ok(resp),
Err(err) => {
warn!("failed to resolve_dir <{:?}>: {}", full_path, err);
Err(StatusCode::INTERNAL_SERVER_ERROR)
}
};
}
match resolve_file(root, &repo).await {
Ok(resp) => Ok(resp),
Err(err) => {
warn!("failed to resolve_file <{:?}>: {}", full_path, err);
Err(StatusCode::INTERNAL_SERVER_ERROR)
}
}
}
#[instrument(skip(repo))]
async fn meta(Path(repo): Path<ResolveParams>) -> Result<Json<Meta>, StatusCode> {
let key = repo.dataset_key();
if let Some(resp) = resolve_meta(&key) {
return Ok(Json(resp));
}
Err(StatusCode::NOT_FOUND)
}

View File

@ -0,0 +1,156 @@
use std::{collections::HashMap, path::PathBuf, str::FromStr};
use anyhow::Result;
use axum::{
body::boxed,
http::{header, Request, Uri},
response::{IntoResponse, Response},
Json,
};
use hyper::Body;
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use tabby_common::{config::Config, SourceFile, Tag};
use tower::ServiceExt;
use tower_http::services::ServeDir;
lazy_static! {
static ref META: HashMap<DatasetKey, Meta> = load_meta();
}
const DIRECTORY_MIME_TYPE: &str = "application/vnd.directory+json";
#[derive(Hash, PartialEq, Eq, Debug)]
pub struct DatasetKey {
local_name: String,
rel_path: String,
}
#[derive(Deserialize, Debug)]
pub struct ResolveParams {
name: String,
path: Option<String>,
}
impl ResolveParams {
pub fn dataset_key(&self) -> DatasetKey {
DatasetKey {
local_name: self.name.clone(),
rel_path: self.path_str().to_string(),
}
}
pub fn name_str(&self) -> &str {
self.name.as_str()
}
pub fn path_str(&self) -> &str {
self.path.as_deref().unwrap_or("")
}
}
#[derive(Serialize)]
struct ListDir {
entries: Vec<String>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Meta {
git_url: String,
filepath: String,
language: String,
max_line_length: usize,
avg_line_length: f32,
alphanum_fraction: f32,
tags: Vec<Tag>,
}
impl From<SourceFile> for Meta {
fn from(file: SourceFile) -> Self {
Self {
git_url: file.git_url,
filepath: file.filepath,
language: file.language,
max_line_length: file.max_line_length,
avg_line_length: file.avg_line_length,
alphanum_fraction: file.alphanum_fraction,
tags: file.tags,
}
}
}
/// TODO: implement auto reloading logic in future (so changes produced by tabby-scheduler command will be loaded)
fn load_meta() -> HashMap<DatasetKey, Meta> {
let mut dataset = HashMap::new();
let repo_conf = match Config::load() {
Ok(config) => config
.repositories
.into_iter()
.map(|repo| (repo.git_url.clone(), repo))
.collect::<HashMap<_, _>>(),
Err(_) => {
return dataset;
}
};
let iter = match SourceFile::all() {
Ok(all) => all,
Err(_) => {
return dataset;
}
};
for file in iter {
if let Some(name) = repo_conf.get(&file.git_url).map(|repo| repo.name()) {
let key = DatasetKey {
local_name: name,
rel_path: file.filepath.clone(),
};
dataset.insert(key, file.into());
}
}
dataset
}
/// Resolve a directory
pub async fn resolve_dir(root: PathBuf, full_path: PathBuf) -> Result<Response> {
let mut read_dir = tokio::fs::read_dir(full_path).await?;
let mut entries = vec![];
while let Some(entry) = read_dir.next_entry().await? {
let path = entry
.path()
.strip_prefix(&root)?
.to_str()
.unwrap()
.to_string();
entries.push(path);
}
let body = Json(ListDir { entries }).into_response();
let resp = Response::builder()
.header(header::CONTENT_TYPE, DIRECTORY_MIME_TYPE)
.body(body.into_body())?;
Ok(resp)
}
/// Resolve a file
pub async fn resolve_file(root: PathBuf, repo: &ResolveParams) -> Result<Response> {
let uri = if !repo.path_str().starts_with('/') {
let path = format!("/{}", repo.path_str());
Uri::from_str(path.as_str())?
} else {
Uri::from_str(repo.path_str())?
};
let req = Request::builder().uri(uri).body(Body::empty()).unwrap();
let resp = ServeDir::new(root).oneshot(req).await?;
Ok(resp.map(boxed))
}
pub fn resolve_meta(key: &DatasetKey) -> Option<Meta> {
if let Some(meta) = META.get(key) {
return Some(meta.clone());
}
None
}