diff --git a/Cargo.lock b/Cargo.lock index b23d509adfc..8e4dc9bf8ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4704,6 +4704,8 @@ dependencies = [ "tokio", "tokio-rusqlite", "tokio-tungstenite", + "tower", + "tower-http 0.4.0", "tracing", "unicase", "uuid 1.4.1", @@ -5315,10 +5317,16 @@ dependencies = [ "http", "http-body", "http-range-header", + "httpdate", + "mime", + "mime_guess", + "percent-encoding", "pin-project-lite", "tokio", + "tokio-util", "tower-layer", "tower-service", + "tracing", ] [[package]] diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs index e344f5324e7..edabae1c5a9 100644 --- a/crates/tabby-common/src/config.rs +++ b/crates/tabby-common/src/config.rs @@ -50,7 +50,7 @@ impl RepositoryConfig { let path = self.git_url.strip_prefix("file://").unwrap(); path.into() } else { - repositories_dir().join(filenamify(&self.git_url)) + repositories_dir().join(to_filename(&self.git_url)) } } @@ -73,9 +73,13 @@ impl Default for ServerConfig { } } +pub fn to_filename>(s: S) -> String { + filenamify(s) +} + #[cfg(test)] mod tests { - use super::{Config, RepositoryConfig}; + use super::{to_filename, Config, RepositoryConfig}; #[test] fn it_parses_empty_config() { @@ -96,4 +100,10 @@ mod tests { }; assert!(!repo.is_local_dir()); } + + #[test] + fn test_to_filename() { + let url = "https://github.com/TabbyML/tabby.git".to_string(); + assert_eq!(to_filename(url), "https_github.com_TabbyML_tabby.git"); + } } diff --git a/crates/tabby-common/src/lib.rs b/crates/tabby-common/src/lib.rs index ed90ff47683..8e0141cfc83 100644 --- a/crates/tabby-common/src/lib.rs +++ b/crates/tabby-common/src/lib.rs @@ -13,14 +13,17 @@ use std::{ path::PathBuf, }; +pub use config::to_filename; use path::dataset_dir; use serde::{Deserialize, Serialize}; use serde_jsonlines::JsonLinesReader; -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct SourceFile { pub git_url: String, pub filepath: String, + #[serde(skip_serializing_if = "String::is_empty")] + #[serde(default)] pub content: String, pub language: String, pub max_line_length: usize, @@ -45,7 +48,7 @@ impl SourceFile { } } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct Tag { pub range: Range, pub name_range: Range, diff --git a/ee/tabby-webserver/Cargo.toml b/ee/tabby-webserver/Cargo.toml index fdb4ecf1b71..71a05e9ec37 100644 --- a/ee/tabby-webserver/Cargo.toml +++ b/ee/tabby-webserver/Cargo.toml @@ -29,6 +29,8 @@ thiserror.workspace = true tokio = { workspace = true, features = ["fs"] } tokio-rusqlite = "0.4.0" tokio-tungstenite = "0.20.1" +tower = { version = "0.4", features = ["util"] } +tower-http = { version = "0.4.0", features = ["fs", "trace"] } tracing.workspace = true unicase = "2.7.0" diff --git a/ee/tabby-webserver/docs/api_spec.md b/ee/tabby-webserver/docs/api_spec.md new file mode 100644 index 00000000000..531a327fc22 --- /dev/null +++ b/ee/tabby-webserver/docs/api_spec.md @@ -0,0 +1,143 @@ +# API Specs + +## Repository api: `/repositories` + +### Resolve + +Get file or directory content from local repositories + +**URL:** `/repositories/{name}/resolve/{path}` + +**Method:** `GET` + +**Request examples:** + +- Get directory content + +```shell +curl --request GET \ + --url http://localhost:8080/repositories/https_github.com_TabbyML_tabby.git/resolve/ + +curl --request GET \ + --url http://localhost:9090/repositories/https_github.com_TabbyML_tabby.git/resolve/ee/tabby-webserver/ +``` + +- Get file content + +```shell +curl --request GET \ + --url http://localhost:8080/repositories/https_github.com_TabbyML_tabby.git/resolve/package.json + +curl --request GET \ + --url http://localhost:9090/repositories/https_github.com_TabbyML_tabby.git/resolve/ee/tabby-webserver/src/api.rs +``` + +**Response examples:** + +- All directory query will return a list of string, with each string represents an entry under that directory. The `Content-Type` for directory query is `application/vnd.directory+json`. + +For `/repositories/https_github.com_TabbyML_tabby.git/resolve/ee/tabby-webserver/`, the response is: + +```json +{ + "entries": [ + "ee/tabby-webserver/src", + "ee/tabby-webserver/ui", + "ee/tabby-webserver/examples", + "ee/tabby-webserver/Cargo.toml", + "ee/tabby-webserver/graphql" + ] +} +``` + +- The file query will return file content, the `Content-Type` will be guessed from the file extension. + +For request `/repositories/https_github.com_TabbyML_tabby.git/resolve/package.json`, the content type is `application/json`, and the response is: + +```json +{ + "private": true, + "workspaces": [ + "clients/tabby-agent", + "clients/vscode", + "clients/vim", + "clients/intellij" + ], + "engines": { + "node": ">=18" + } +} +``` + +For request `/repositories/https_github.com_TabbyML_tabby.git/resolve/ee/tabby-webserver/src/api.rs`, the content type is `text/x-rust`, and the response is: + +```text +use async_trait::async_trait; +use juniper::{GraphQLEnum, GraphQLObject}; +use serde::{Deserialize, Serialize}; +use tabby_common::api::{ + code::{CodeSearch, CodeSearchError, SearchResponse}, + event::RawEventLogger, +}; +use thiserror::Error; +use tokio_tungstenite::connect_async; + +use crate::websocket::WebSocketTransport; + +#[derive(GraphQLEnum, Serialize, Deserialize, Clone, Debug)] +pub enum WorkerKind { + Completion, + Chat, +} + +......omit...... +``` + +### Meta + +Get dataset entry for each indexed file in the repository + +**URL:** `/repositories/{name}/meta/{path}` + +**Method:** `GET` + +**Request example:** + +```shell +curl --request GET \ + --url http://localhost:9090/repositories/https_github.com_TabbyML_tabby.git/meta/ee/tabby-webserver/src/lib.rs +``` + +**Response example:** + +The `Content-Type` for successful response is always `application/json`. + +```json +{ + "git_url": "https://github.com/TabbyML/tabby.git", + "filepath": "ee/tabby-webserver/src/lib.rs", + "language": "rust", + "max_line_length": 88, + "avg_line_length": 26.340782, + "alphanum_fraction": 0.56416017, + "tags": [ + { + "range": { + "start": 0, + "end": 12 + }, + "name_range": { + "start": 8, + "end": 11 + }, + "line_range": { + "start": 0, + "end": 12 + }, + "is_definition": true, + "syntax_type_name": "module" + }, + ......omit...... + ] +} +``` diff --git a/ee/tabby-webserver/src/lib.rs b/ee/tabby-webserver/src/lib.rs index e93f3382a9e..0d809ad7250 100644 --- a/ee/tabby-webserver/src/lib.rs +++ b/ee/tabby-webserver/src/lib.rs @@ -11,6 +11,7 @@ use tracing::{error, warn}; use websocket::WebSocketTransport; mod db; +mod repositories; mod server; mod ui; mod websocket; @@ -31,6 +32,8 @@ use schema::Schema; use server::ServerContext; use tarpc::server::{BaseChannel, Channel}; +use crate::repositories::repo::load_dataset; + pub async fn attach_webserver( api: Router, ui: Router, @@ -49,7 +52,8 @@ pub async fn attach_webserver( ) .route("/graphql", routing::get(playground("/graphql", None))) .layer(Extension(schema)) - .route("/hub", routing::get(ws_handler).with_state(ctx)); + .route("/hub", routing::get(ws_handler).with_state(ctx)) + .nest("/repositories", repositories_routers().await); let ui = ui .route("/graphiql", routing::get(graphiql("/graphql", None))) @@ -58,6 +62,16 @@ pub async fn attach_webserver( (api, ui) } +async fn repositories_routers() -> Router { + load_dataset().await.unwrap(); + + Router::new() + .route("/:name/resolve/", routing::get(repositories::resolve)) + .route("/:name/resolve/*path", routing::get(repositories::resolve)) + .route("/:name/meta/", routing::get(repositories::meta)) + .route("/:name/meta/*path", routing::get(repositories::meta)) +} + async fn distributed_tabby_layer( State(ws): State>, request: Request, diff --git a/ee/tabby-webserver/src/repositories.rs b/ee/tabby-webserver/src/repositories.rs new file mode 100644 index 00000000000..dca71662473 --- /dev/null +++ b/ee/tabby-webserver/src/repositories.rs @@ -0,0 +1,49 @@ +pub(crate) mod repo; + +use anyhow::Result; +use axum::{extract::Path, http::StatusCode, response::Response, Json}; +use tabby_common::{path::repositories_dir, SourceFile}; +use tracing::{debug, instrument, warn}; + +use crate::repositories::repo::{resolve_dir, resolve_file, Repository, DATASET}; + +#[instrument(skip(repo))] +pub async fn resolve(Path(repo): Path) -> Result { + debug!("repo: {:?}", repo); + let root = repositories_dir().join(repo.name_str()); + let full_path = root.join(repo.path_str()); + let is_dir = tokio::fs::metadata(full_path.clone()) + .await + .map(|m| m.is_dir()) + .unwrap_or(false); + + if is_dir { + return match resolve_dir(root, full_path).await { + Ok(resp) => Ok(resp), + Err(err) => { + warn!("{}", err); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + }; + } + + match resolve_file(root, &repo).await { + Ok(resp) => Ok(resp), + Err(err) => { + warn!("{}", err); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + } +} + +#[instrument(skip(repo))] +pub async fn meta(Path(repo): Path) -> Result, StatusCode> { + debug!("repo: {:?}", repo); + let key = repo.dataset_key(); + if let Some(dataset) = DATASET.get() { + if let Some(file) = dataset.get(&key) { + return Ok(Json(file.clone())); + } + } + Err(StatusCode::NOT_FOUND) +} diff --git a/ee/tabby-webserver/src/repositories/repo.rs b/ee/tabby-webserver/src/repositories/repo.rs new file mode 100644 index 00000000000..bfe5eef32ed --- /dev/null +++ b/ee/tabby-webserver/src/repositories/repo.rs @@ -0,0 +1,122 @@ +use std::{collections::HashMap, path::PathBuf, str::FromStr}; + +use anyhow::Result; +use axum::{ + body::boxed, + http::{header, Request, Uri}, + response::{IntoResponse, Response}, + Json, +}; +use hyper::Body; +use serde::{Deserialize, Serialize}; +use tabby_common::{to_filename, SourceFile}; +use tokio::sync::OnceCell; +use tower::ServiceExt; +use tower_http::services::ServeDir; +use tracing::error; + +pub(crate) static DATASET: OnceCell> = OnceCell::const_new(); + +const MIME_VENDOR: &str = "application/vnd.directory+json"; + +#[derive(Hash, PartialEq, Eq, Debug)] +pub struct DatasetKey { + local_name: String, + rel_path: String, +} + +#[derive(Deserialize, Debug)] +pub struct Repository { + name: String, + path: Option, +} + +impl Repository { + pub fn dataset_key(&self) -> DatasetKey { + DatasetKey { + local_name: self.name.clone(), + rel_path: self.path_str().to_string(), + } + } + + pub fn name_str(&self) -> &str { + self.name.as_str() + } + + pub fn path_str(&self) -> &str { + self.path.as_deref().unwrap_or("") + } +} + +#[derive(Serialize)] +struct ListDir { + entries: Vec, +} + +/// Load dataset +pub async fn load_dataset() -> Result<()> { + // `SourceFile::all()` depends on `std::io`, so it's blocking. + // We need to spawn a blocking task dedicated for such scenario. + let dataset = tokio::task::spawn_blocking(|| { + let mut dataset = HashMap::new(); + let iter = match SourceFile::all() { + Ok(all) => all, + Err(err) => { + error!("load dataset: {}", err); + return dataset; + } + }; + for mut file in iter { + let key = DatasetKey { + local_name: to_filename(file.git_url.as_str()), + rel_path: file.filepath.clone(), + }; + // exclude content from response data + file.content = "".to_string(); + dataset.insert(key, file); + } + dataset + }) + .await?; + + DATASET.set(dataset)?; + Ok(()) +} + +/// Resolve a directory +pub async fn resolve_dir(root: PathBuf, full_path: PathBuf) -> Result { + let mut read_dir = tokio::fs::read_dir(full_path).await?; + let mut entries = vec![]; + + while let Some(entry) = read_dir.next_entry().await? { + let path = entry + .path() + .strip_prefix(&root)? + .to_str() + .unwrap() + .to_string(); + entries.push(path); + } + + let body = Json(ListDir { entries }).into_response(); + let resp = Response::builder() + .header(header::CONTENT_TYPE, MIME_VENDOR) + .body(body.into_body())?; + + Ok(resp) +} + +/// Resolve a file +pub async fn resolve_file(root: PathBuf, repo: &Repository) -> Result { + let uri = if !repo.path_str().starts_with('/') { + let path = format!("/{}", repo.path_str()); + Uri::from_str(path.as_str())? + } else { + Uri::from_str(repo.path_str())? + }; + + let req = Request::builder().uri(uri).body(Body::empty()).unwrap(); + let resp = ServeDir::new(root).oneshot(req).await?; + + Ok(resp.map(boxed)) +}