From fcbc5edc55f59bf1c1e1a902117e82832cb77b40 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Thu, 22 Jun 2023 14:23:35 -0700 Subject: [PATCH] Revert "feat: add /experimental/search endpoint (#258)" (#260) This reverts commit 04980160e5070f9f090fccd56806d59bc5d60e4d. --- Cargo.lock | 179 ++++++++-------------------- Cargo.toml | 1 - crates/tabby-scheduler/Cargo.toml | 2 +- crates/tabby-scheduler/src/index.rs | 22 +--- crates/tabby/Cargo.toml | 2 +- crates/tabby/src/serve/mod.rs | 22 +--- crates/tabby/src/serve/search.rs | 142 ---------------------- 7 files changed, 58 insertions(+), 312 deletions(-) delete mode 100644 crates/tabby/src/serve/search.rs diff --git a/Cargo.lock b/Cargo.lock index 630eb22..fed0ccb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,11 +30,11 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.3" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" dependencies = [ - "cfg-if", + "getrandom", "once_cell", "version_check", ] @@ -711,7 +711,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" dependencies = [ "cfg-if", - "hashbrown 0.12.3", + "hashbrown", "lock_api", "once_cell", "parking_lot_core", @@ -862,6 +862,20 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04" +[[package]] +name = "fastfield_codecs" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374a3a53c1bd5fb31b10084229290eafb0a05f260ec90f1f726afffda4877a8a" +dependencies = [ + "fastdivide", + "itertools 0.10.5", + "log", + "ownedbytes", + "tantivy-bitpacker", + "tantivy-common", +] + [[package]] name = "fastrand" version = "1.9.0" @@ -958,16 +972,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "fs4" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7672706608ecb74ab2e055c68327ffc25ae4cac1e12349204fd5fb0f3487cce2" -dependencies = [ - "rustix", - "windows-sys 0.48.0", -] - [[package]] name = "futures" version = "0.3.28" @@ -1127,12 +1131,6 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hashbrown" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ "ahash", ] @@ -1308,7 +1306,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown 0.12.3", + "hashbrown", "serde", ] @@ -1528,18 +1526,18 @@ dependencies = [ [[package]] name = "lru" -version = "0.10.0" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03f1160296536f10c833a82dca22267d5486734230d47bf00bf435885814ba1e" +checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a" dependencies = [ - "hashbrown 0.13.2", + "hashbrown", ] [[package]] name = "lz4_flex" -version = "0.10.0" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83" +checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3" [[package]] name = "macro_rules_attribute" @@ -1599,9 +1597,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memmap2" -version = "0.6.2" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d28bba84adfe6646737845bc5ebbfa2c08424eb1c37e94a1fd2a82adb56a872" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" dependencies = [ "libc", ] @@ -1696,9 +1694,12 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" [[package]] name = "murmurhash32" -version = "0.3.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9380db4c04d219ac5c51d14996bbf2c2e9a15229771b53f8671eb6c83cf44df" +checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a" +dependencies = [ + "byteorder", +] [[package]] name = "native-tls" @@ -1956,9 +1957,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" -version = "0.5.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c718e498b20704d5fb5d51d07f414a22f61c19254c1708e117b93fd76860739c" +checksum = "8e957eaa64a299f39755416e5b3128c505e9d63a91d0453771ad2ccd3907f8db" dependencies = [ "stable_deref_trait", ] @@ -2660,15 +2661,6 @@ dependencies = [ "libc", ] -[[package]] -name = "sketches-ddsketch" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68a406c1882ed7f29cd5e248c9848a80e7cb6ae0fea82346d2746f2f941c07e1" -dependencies = [ - "serde", -] - [[package]] name = "slab" version = "0.4.8" @@ -2802,7 +2794,6 @@ dependencies = [ "tabby-common", "tabby-download", "tabby-scheduler", - "tantivy", "tokio", "tower", "tower-http 0.4.0", @@ -2864,14 +2855,14 @@ dependencies = [ [[package]] name = "tantivy" -version = "0.20.2" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aec540e9cebc88f523f67f596dee213e491f0c55961de013566f267a0c31f5e9" +checksum = "5bb26a6b22c84d8be41d99a14016d6f04d30d8d31a2ea411a8ab553af5cc490d" dependencies = [ - "aho-corasick 1.0.1", + "aho-corasick 0.7.20", "arc-swap", "async-trait", - "base64 0.21.2", + "base64 0.13.1", "bitpacking", "byteorder", "census", @@ -2880,7 +2871,8 @@ dependencies = [ "downcast-rs", "fail", "fastdivide", - "fs4", + "fastfield_codecs", + "fs2", "htmlescape", "itertools 0.10.5", "levenshtein_automata", @@ -2893,21 +2885,19 @@ dependencies = [ "num_cpus", "once_cell", "oneshot", + "ownedbytes", "rayon", "regex", "rust-stemmers", "rustc-hash", "serde", "serde_json", - "sketches-ddsketch", "smallvec", + "stable_deref_trait", "tantivy-bitpacker", - "tantivy-columnar", "tantivy-common", "tantivy-fst", "tantivy-query-grammar", - "tantivy-stacker", - "tantivy-tokenizer-api", "tempfile", "thiserror", "time 0.3.21", @@ -2917,40 +2907,18 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" -version = "0.4.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16099e96f0ede682084469b80d6909dc170aa2b11d2a45538b5b36b2a90090b9" -dependencies = [ - "bitpacking", -] - -[[package]] -name = "tantivy-columnar" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e32b024b26eab93eb8648faf08004356bf9d47376557ee4409f4b210163656" -dependencies = [ - "fastdivide", - "fnv", - "itertools 0.10.5", - "serde", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-sstable", - "tantivy-stacker", -] +checksum = "e71a0c95b82d4292b097a09b989a6380d28c3a86800c841a2d03bae1fc8b9fa6" [[package]] name = "tantivy-common" -version = "0.5.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7d12fdd6ec0f7e0962f129c03c696a85ec567734950cbb2b89af4a293ce342f" +checksum = "14fef4182bb60df9a4b92cd8ecab39ba2e50a05542934af17eef1f49660705cb" dependencies = [ - "async-trait", "byteorder", "ownedbytes", - "serde", - "time 0.3.21", ] [[package]] @@ -2966,45 +2934,15 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" -version = "0.20.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "106d8f78ad1da4f0fdd526a0760c326c0573510d4dedabeb1962d35a35879797" +checksum = "343e3ada4c1c480953f6960f8a21ce9c76611480ffdd4f4e230fdddce0fc5331" dependencies = [ "combine", "once_cell", "regex", ] -[[package]] -name = "tantivy-sstable" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eda34243d3ee64bd8f9ba74a3b0d05f4d07beff7767a727212e9b5a19c13dde7" -dependencies = [ - "tantivy-common", - "tantivy-fst", - "zstd 0.12.3+zstd.1.5.2", -] - -[[package]] -name = "tantivy-stacker" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b9e9470301b026ad3b95f79a791a2a3ee81f3ab16fbe412a9dd81ff834acf5" -dependencies = [ - "murmurhash32", - "tantivy-common", -] - -[[package]] -name = "tantivy-tokenizer-api" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bee3519354cbe6bfff4bf6be9cf3e2dfcd0a6ea748a42312fbf1242c0d66fc6" -dependencies = [ - "serde", -] - [[package]] name = "tar" version = "0.4.38" @@ -4144,7 +4082,7 @@ dependencies = [ "pbkdf2", "sha1", "time 0.3.21", - "zstd 0.11.2+zstd.1.5.2", + "zstd", ] [[package]] @@ -4153,16 +4091,7 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe 5.0.2+zstd.1.5.2", -] - -[[package]] -name = "zstd" -version = "0.12.3+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" -dependencies = [ - "zstd-safe 6.0.5+zstd.1.5.4", + "zstd-safe", ] [[package]] @@ -4175,16 +4104,6 @@ dependencies = [ "zstd-sys", ] -[[package]] -name = "zstd-safe" -version = "6.0.5+zstd.1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" -dependencies = [ - "libc", - "zstd-sys", -] - [[package]] name = "zstd-sys" version = "2.0.8+zstd.1.5.5" diff --git a/Cargo.toml b/Cargo.toml index 99228bb..28fa131 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,4 +24,3 @@ tracing = "0.1" tracing-subscriber = "0.3" anyhow = "1.0.71" serde-jsonlines = "0.4.0" -tantivy = "0.20.2" diff --git a/crates/tabby-scheduler/Cargo.toml b/crates/tabby-scheduler/Cargo.toml index 9707dc9..c72adc4 100644 --- a/crates/tabby-scheduler/Cargo.toml +++ b/crates/tabby-scheduler/Cargo.toml @@ -10,7 +10,7 @@ anyhow = { workspace = true } filenamify = "0.1.0" job_scheduler = "1.2.1" tabby-common = { path = "../tabby-common" } -tantivy = { workspace = true } +tantivy = "0.19.2" tracing = { workspace = true } tree-sitter-javascript = "0.20.0" tree-sitter-tags = "0.20.2" diff --git a/crates/tabby-scheduler/src/index.rs b/crates/tabby-scheduler/src/index.rs index a5953a8..4e2d65b 100644 --- a/crates/tabby-scheduler/src/index.rs +++ b/crates/tabby-scheduler/src/index.rs @@ -8,7 +8,6 @@ use tantivy::{ schema::{Schema, STORED, STRING, TEXT}, Index, }; -use tracing::info; pub fn index_repositories(_config: &Config) -> Result<()> { let mut builder = Schema::builder(); @@ -25,24 +24,15 @@ pub fn index_repositories(_config: &Config) -> Result<()> { writer.delete_all_documents()?; for doc in Document::all()? { - if is_valid_doc(&doc) { - writer.add_document(doc!( - git_url => doc.git_url, - filepath => doc.filepath, - content => doc.content, - language => doc.language, - ))?; - } else { - info!("Skip {} - {}", doc.git_url, doc.filepath); - } + writer.add_document(doc!( + git_url => doc.git_url, + filepath => doc.filepath, + content => doc.content, + language => doc.language, + ))?; } - info!("Finalize index..."); writer.commit()?; Ok(()) } - -fn is_valid_doc(x: &Document) -> bool { - x.max_line_length < 1000 && x.avg_line_length < 100.0 && x.alphanum_fraction > 0.25 -} diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml index 1e3e486..be6ee1a 100644 --- a/crates/tabby/Cargo.toml +++ b/crates/tabby/Cargo.toml @@ -30,7 +30,7 @@ opentelemetry = { version = "0.18.0", features = ["rt-tokio"] } opentelemetry-otlp = "0.11.0" axum-tracing-opentelemetry = "0.10.0" tracing-opentelemetry = "0.18.0" -tantivy = { workspace = true } + [dependencies.uuid] version = "1.3.3" diff --git a/crates/tabby/src/serve/mod.rs b/crates/tabby/src/serve/mod.rs index dcbe698..3a2d482 100644 --- a/crates/tabby/src/serve/mod.rs +++ b/crates/tabby/src/serve/mod.rs @@ -1,7 +1,6 @@ mod completions; mod events; mod health; -mod search; use std::{ net::{Ipv4Addr, SocketAddr}, @@ -31,7 +30,7 @@ OpenAPI documentation for [tabby](https://github.com/TabbyML/tabby), a self-host (url = "https://playground.app.tabbyml.com/tabby", description = "Playground server"), (url = "http://localhost:8080", description = "Local server"), ), - paths(events::log_event, completions::completion, health::health, search::search), + paths(events::log_event, completions::completion, health::health), components(schemas( events::LogEventRequest, completions::CompletionRequest, @@ -39,8 +38,6 @@ OpenAPI documentation for [tabby](https://github.com/TabbyML/tabby), a self-host completions::Segments, completions::Choice, health::HealthState, - search::SearchResponse, - search::Document, )) )] struct ApiDoc; @@ -126,7 +123,6 @@ pub async fn main(args: &ServeArgs) { let app = Router::new() .merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", ApiDoc::openapi())) .nest("/v1", api_router(args)) - .nest("/experimental", experimental_router()) .fallback(fallback()); let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port)); @@ -153,22 +149,6 @@ fn api_router(args: &ServeArgs) -> Router { .layer(opentelemetry_tracing_layer()) } -fn experimental_router() -> Router { - let state = search::SearchState::new(); - - if let Some(state) = state { - Router::new() - .route( - "/search", - routing::get(search::search).with_state(Arc::new(state)), - ) - .layer(CorsLayer::permissive()) - .layer(opentelemetry_tracing_layer()) - } else { - Router::new() - } -} - fn fallback() -> routing::MethodRouter { routing::get(|| async { axum::response::Redirect::temporary("/swagger-ui") }) } diff --git a/crates/tabby/src/serve/search.rs b/crates/tabby/src/serve/search.rs deleted file mode 100644 index 63fd152..0000000 --- a/crates/tabby/src/serve/search.rs +++ /dev/null @@ -1,142 +0,0 @@ -use std::sync::Arc; - -use axum::{ - extract::{Query, State}, - Json, -}; -use hyper::StatusCode; -use serde::{Deserialize, Serialize}; -use tabby_common::path::index_dir; -use tantivy::{ - collector::TopDocs, - query::{QueryParser, QueryParserError}, - schema::Field, - DocAddress, Index, IndexReader, Score, TantivyError, -}; -use utoipa::{IntoParams, ToSchema}; - -pub struct SearchState { - reader: IndexReader, - query_parser: QueryParser, - git_url_field: Field, - language_field: Field, - content_field: Field, -} - -impl SearchState { - pub fn new() -> Option { - let index = Index::open_in_dir(index_dir()).ok()?; - let content_field = index.schema().get_field("content").ok()?; - let language_field = index.schema().get_field("language").ok()?; - let git_url_field = index.schema().get_field("git_url").ok()?; - - let query_parser = QueryParser::for_index(&index, vec![content_field]); - let state = SearchState { - reader: index.reader().ok()?, - query_parser, - content_field, - language_field, - git_url_field, - }; - - Some(state) - } -} - -#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)] -pub struct Document { - git_url: String, - language: String, - content: String, -} - -#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)] -pub struct SearchResponse { - docs: Vec, -} - -#[derive(Deserialize, IntoParams)] -pub struct SearchRequest { - #[param(example = "function")] - q: String, - #[param(example = "10")] - limit: Option, - #[param(example = "0")] - offset: Option, -} - -#[utoipa::path( - get, - path = "/experimental/search", - tag = "experimental", - params(SearchRequest), - responses( - (status = 200, description = "Success", body = SearchResponse, content_type = "application/json"), - (status = 501, description = "Not Implemented"), - ) -)] -pub async fn search( - State(state): State>, - params: Query, -) -> Result, StatusCode> { - let searcher = state.reader.searcher(); - - let query = state - .query_parser - .parse_query(¶ms.q) - .map_err(QueryParserError::status)?; - - let search_options = - TopDocs::with_limit(params.limit.unwrap_or(10)).and_offset(params.offset.unwrap_or(0)); - let top_docs: Vec<(Score, DocAddress)> = searcher - .search(&query, &search_options) - .map_err(TantivyError::status)?; - - let docs = top_docs - .iter() - .filter_map(|(_score, doc_address)| { - let Some(retrieved_doc) = searcher.doc(*doc_address).ok() else { - return None; - }; - - Some(Document { - git_url: retrieved_doc - .get_first(state.git_url_field) - .unwrap() - .as_text() - .unwrap() - .to_owned(), - language: retrieved_doc - .get_first(state.language_field) - .unwrap() - .as_text() - .unwrap() - .to_owned(), - content: retrieved_doc - .get_first(state.content_field) - .unwrap() - .as_text() - .unwrap() - .to_owned(), - }) - }) - .collect(); - - Ok(Json(SearchResponse { docs })) -} - -trait Handler { - fn status(self) -> StatusCode; -} - -impl Handler for TantivyError { - fn status(self) -> StatusCode { - StatusCode::INTERNAL_SERVER_ERROR - } -} - -impl Handler for QueryParserError { - fn status(self) -> StatusCode { - StatusCode::INTERNAL_SERVER_ERROR - } -}