From 9cd2accbaa5cfac76d8812238a70eca23eedf3c7 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Thu, 5 Oct 2023 13:29:41 +0800 Subject: [PATCH] feat: adjust code indexing logic (#510) --- Cargo.lock | 273 +++++++++++------- Cargo.toml | 2 +- crates/tabby-scheduler/src/index.rs | 39 ++- crates/tabby/src/serve/completions/prompt.rs | 12 +- .../main.py => scheduler/dataset.py} | 0 experimental/scheduler/search.py | 30 ++ 6 files changed, 243 insertions(+), 113 deletions(-) rename experimental/{dataset-viewer/main.py => scheduler/dataset.py} (100%) create mode 100644 experimental/scheduler/search.py diff --git a/Cargo.lock b/Cargo.lock index 99c0906..b9bcb22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,17 +28,6 @@ dependencies = [ "cpufeatures", ] -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom", - "once_cell", - "version_check", -] - [[package]] name = "ahash" version = "0.8.3" @@ -206,7 +195,7 @@ checksum = "f8175979259124331c1d7bf6586ee7e0da434155e4b2d48ec2c8386281d8df39" dependencies = [ "async-trait", "axum-core", - "bitflags", + "bitflags 1.3.2", "bytes", "futures-util", "http", @@ -322,6 +311,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + [[package]] name = "bitpacking" version = "0.8.4" @@ -385,7 +380,7 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8cead8ece0da6b744b2ad8ef9c58a4cdc7ef2921e60a6ddfb9eaaa86839b5fc5" dependencies = [ - "ahash 0.8.3", + "ahash", "async-trait", "cached_proc_macro", "cached_proc_macro_types", @@ -508,7 +503,7 @@ checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990" dependencies = [ "anstream", "anstyle", - "bitflags", + "bitflags 1.3.2", "clap_lex", "strsim 0.10.0", ] @@ -556,15 +551,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" -[[package]] -name = "combine" -version = "4.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" -dependencies = [ - "memchr", -] - [[package]] name = "console" version = "0.15.7" @@ -970,37 +956,12 @@ dependencies = [ "cc", ] -[[package]] -name = "fail" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c" -dependencies = [ - "log", - "once_cell", - "rand", -] - [[package]] name = "fastdivide" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04" -[[package]] -name = "fastfield_codecs" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374a3a53c1bd5fb31b10084229290eafb0a05f260ec90f1f726afffda4877a8a" -dependencies = [ - "fastdivide", - "itertools 0.10.5", - "log", - "ownedbytes", - "tantivy-bitpacker", - "tantivy-common", -] - [[package]] name = "fastrand" version = "1.9.0" @@ -1097,6 +1058,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "fs4" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eeb4ed9e12f43b7fa0baae3f9cdda28352770132ef2e09a23760c29cae8bd47" +dependencies = [ + "rustix 0.38.17", + "windows-sys 0.48.0", +] + [[package]] name = "futures" version = "0.3.28" @@ -1256,9 +1227,6 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash 0.7.6", -] [[package]] name = "hashbrown" @@ -1266,7 +1234,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" dependencies = [ - "ahash 0.8.3", + "ahash", "allocator-api2", ] @@ -1540,7 +1508,7 @@ checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ "hermit-abi 0.3.1", "io-lifetimes", - "rustix", + "rustix 0.37.19", "windows-sys 0.48.0", ] @@ -1571,6 +1539,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -1620,9 +1597,9 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "libc" -version = "0.2.144" +version = "0.2.148" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" [[package]] name = "libloading" @@ -1649,6 +1626,12 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" +[[package]] +name = "linux-raw-sys" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3852614a3bd9ca9804678ba6be5e3b8ce76dfc902cae004e3e0c44051b6e88db" + [[package]] name = "llama-cpp-bindings" version = "0.2.0" @@ -1701,18 +1684,18 @@ dependencies = [ [[package]] name = "lru" -version = "0.7.8" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a" +checksum = "a4a83fb7698b3643a0e34f9ae6f2e8f0178c0fd42f8b59d493aa271ff3a5bf21" dependencies = [ - "hashbrown 0.12.3", + "hashbrown 0.14.0", ] [[package]] name = "lz4_flex" -version = "0.9.5" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3" +checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" [[package]] name = "macro_rules_attribute" @@ -1772,9 +1755,9 @@ checksum = "5486aed0026218e61b8a01d5fbd5a0a134649abb71a0e53b7bc088529dced86e" [[package]] name = "memmap2" -version = "0.5.10" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" dependencies = [ "libc", ] @@ -1886,12 +1869,9 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" [[package]] name = "murmurhash32" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a" -dependencies = [ - "byteorder", -] +checksum = "d9380db4c04d219ac5c51d14996bbf2c2e9a15229771b53f8671eb6c83cf44df" [[package]] name = "native-tls" @@ -1995,7 +1975,7 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7cd21b9f5a1cce3c3515c9ffa85f5c7443e07162dae0ccf4339bb7ca38ad3454" dependencies = [ - "bitflags", + "bitflags 1.3.2", "libloading", "nvml-wrapper-sys", "static_assertions", @@ -2042,7 +2022,7 @@ version = "6.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" dependencies = [ - "bitflags", + "bitflags 1.3.2", "libc", "once_cell", "onig_sys", @@ -2064,7 +2044,7 @@ version = "0.10.52" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56" dependencies = [ - "bitflags", + "bitflags 1.3.2", "cfg-if", "foreign-types", "libc", @@ -2190,9 +2170,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e957eaa64a299f39755416e5b3128c505e9d63a91d0453771ad2ccd3907f8db" +checksum = "6e8a72b918ae8198abb3a18c190288123e1d442b6b9a7d709305fd194688b4b7" dependencies = [ "stable_deref_trait", ] @@ -2499,7 +2479,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -2508,7 +2488,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] @@ -2728,11 +2708,24 @@ version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", - "linux-raw-sys", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustix" +version = "0.38.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7" +dependencies = [ + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys 0.4.8", "windows-sys 0.48.0", ] @@ -2790,7 +2783,7 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8" dependencies = [ - "bitflags", + "bitflags 1.3.2", "core-foundation", "core-foundation-sys", "libc", @@ -2946,6 +2939,15 @@ dependencies = [ "libc", ] +[[package]] +name = "sketches-ddsketch" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a406c1882ed7f29cd5e248c9848a80e7cb6ae0fea82346d2746f2f941c07e1" +dependencies = [ + "serde", +] + [[package]] name = "slab" version = "0.4.8" @@ -3202,26 +3204,24 @@ dependencies = [ [[package]] name = "tantivy" -version = "0.19.2" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb26a6b22c84d8be41d99a14016d6f04d30d8d31a2ea411a8ab553af5cc490d" +checksum = "c1d4675fed6fe2218ce11445374e181e864a8ffd0f28e7e0591ccfc38cd000ae" dependencies = [ - "aho-corasick 0.7.20", + "aho-corasick 1.0.1", "arc-swap", "async-trait", - "base64 0.13.1", + "base64 0.21.2", "bitpacking", "byteorder", "census", "crc32fast", "crossbeam-channel", "downcast-rs", - "fail", "fastdivide", - "fastfield_codecs", - "fs2", + "fs4", "htmlescape", - "itertools 0.10.5", + "itertools 0.11.0", "levenshtein_automata", "log", "lru", @@ -3232,19 +3232,21 @@ dependencies = [ "num_cpus", "once_cell", "oneshot", - "ownedbytes", "rayon", "regex", "rust-stemmers", "rustc-hash", "serde", "serde_json", + "sketches-ddsketch", "smallvec", - "stable_deref_trait", "tantivy-bitpacker", + "tantivy-columnar", "tantivy-common", "tantivy-fst", "tantivy-query-grammar", + "tantivy-stacker", + "tantivy-tokenizer-api", "tempfile", "thiserror", "time 0.3.26", @@ -3254,18 +3256,40 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e71a0c95b82d4292b097a09b989a6380d28c3a86800c841a2d03bae1fc8b9fa6" +checksum = "cecb164321482301f514dd582264fa67f70da2d7eb01872ccd71e35e0d96655a" +dependencies = [ + "bitpacking", +] + +[[package]] +name = "tantivy-columnar" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d85f8019af9a78b3118c11298b36ffd21c2314bd76bbcd9d12e00124cbb7e70" +dependencies = [ + "fastdivide", + "fnv", + "itertools 0.11.0", + "serde", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-sstable", + "tantivy-stacker", +] [[package]] name = "tantivy-common" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14fef4182bb60df9a4b92cd8ecab39ba2e50a05542934af17eef1f49660705cb" +checksum = "af4a3a975e604a2aba6b1106a04505e1e7a025e6def477fab6e410b4126471e1" dependencies = [ + "async-trait", "byteorder", "ownedbytes", + "serde", + "time 0.3.26", ] [[package]] @@ -3281,13 +3305,41 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" -version = "0.19.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343e3ada4c1c480953f6960f8a21ce9c76611480ffdd4f4e230fdddce0fc5331" +checksum = "1d39c5a03100ac10c96e0c8b07538e2ab8b17da56434ab348309b31f23fada77" dependencies = [ - "combine", - "once_cell", - "regex", + "nom 7.1.3", +] + +[[package]] +name = "tantivy-sstable" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0c1bb43e5e8b8e05eb8009610344dbf285f06066c844032fbb3e546b3c71df" +dependencies = [ + "tantivy-common", + "tantivy-fst", + "zstd 0.12.4", +] + +[[package]] +name = "tantivy-stacker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2c078595413f13f218cf6f97b23dcfd48936838f1d3d13a1016e05acd64ed6c" +dependencies = [ + "murmurhash32", + "tantivy-common", +] + +[[package]] +name = "tantivy-tokenizer-api" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "347b6fb212b26d3505d224f438e3c4b827ab8bd847fe9953ad5ac6b8f9443b66" +dependencies = [ + "serde", ] [[package]] @@ -3316,7 +3368,7 @@ dependencies = [ "cfg-if", "fastrand", "redox_syscall 0.3.5", - "rustix", + "rustix 0.37.19", "windows-sys 0.45.0", ] @@ -3642,7 +3694,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" dependencies = [ - "bitflags", + "bitflags 1.3.2", "bytes", "futures-core", "futures-util", @@ -3661,7 +3713,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d1d42a9b3f3ec46ba828e8d376aec14592ea199f70a06a548587ecd1c4ab658" dependencies = [ - "bitflags", + "bitflags 1.3.2", "bytes", "futures-core", "futures-util", @@ -4462,7 +4514,7 @@ dependencies = [ "pbkdf2", "sha1", "time 0.3.26", - "zstd", + "zstd 0.11.2+zstd.1.5.2", ] [[package]] @@ -4471,7 +4523,16 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe", + "zstd-safe 5.0.2+zstd.1.5.2", +] + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe 6.0.6", ] [[package]] @@ -4484,6 +4545,16 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.8+zstd.1.5.5" diff --git a/Cargo.toml b/Cargo.toml index d8ea93c..ce22d29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ tracing = "0.1" tracing-subscriber = "0.3" anyhow = "1.0.71" serde-jsonlines = "0.4.0" -tantivy = "0.19.2" +tantivy = "0.21.0" async-trait = "0.1.72" reqwest = { version = "0.11.18" } derive_builder = "0.12.0" diff --git a/crates/tabby-scheduler/src/index.rs b/crates/tabby-scheduler/src/index.rs index 8477760..4719fe5 100644 --- a/crates/tabby-scheduler/src/index.rs +++ b/crates/tabby-scheduler/src/index.rs @@ -5,29 +5,54 @@ use tabby_common::{config::Config, path::index_dir, SourceFile}; use tantivy::{ directory::MmapDirectory, doc, - schema::{Schema, STORED, STRING, TEXT}, + schema::{Schema, TextFieldIndexing, TextOptions, STORED, STRING}, + tokenizer::{RegexTokenizer, RemoveLongFilter, TextAnalyzer}, Index, }; +// Magic numbers +static MAX_LINE_LENGTH_THRESHOLD: usize = 300; +static AVG_LINE_LENGTH_THRESHOLD: f32 = 150f32; + pub fn index_repositories(_config: &Config) -> Result<()> { let mut builder = Schema::builder(); + let code_indexing_options = TextFieldIndexing::default() + .set_tokenizer("code") + .set_index_option(tantivy::schema::IndexRecordOption::WithFreqsAndPositions); + let code_options = TextOptions::default() + .set_indexing_options(code_indexing_options) + .set_stored(); + let field_git_url = builder.add_text_field("git_url", STRING | STORED); let field_filepath = builder.add_text_field("filepath", STRING | STORED); let field_language = builder.add_text_field("language", STRING | STORED); let field_name = builder.add_text_field("name", STRING | STORED); let field_kind = builder.add_text_field("kind", STRING | STORED); - let field_body = builder.add_text_field("body", TEXT | STORED); + let field_body = builder.add_text_field("body", code_options); let schema = builder.build(); fs::create_dir_all(index_dir())?; let directory = MmapDirectory::open(index_dir())?; let index = Index::open_or_create(directory, schema)?; + let code_tokenizer = TextAnalyzer::builder(RegexTokenizer::new(r"(?:\w*)").unwrap()) + .filter(RemoveLongFilter::limit(40)) + .build(); + + index.tokenizers().register("code", code_tokenizer); let mut writer = index.writer(10_000_000)?; writer.delete_all_documents()?; for file in SourceFile::all()? { + if file.max_line_length > MAX_LINE_LENGTH_THRESHOLD { + continue; + } + + if file.avg_line_length > AVG_LINE_LENGTH_THRESHOLD { + continue; + } + for doc in from_source_file(file) { writer.add_document(doc!( field_git_url => doc.git_url, @@ -141,8 +166,18 @@ mod tests { assert_eq!(docs[0].name, "ConstantLengthDataset"); assert_eq!(docs[0].kind, "class"); + assert!( + docs[0].body.starts_with("class ConstantLengthDataset"), + "body: {:?}", + docs[0].body + ); assert_eq!(docs[1].name, "__init__"); assert_eq!(docs[1].kind, "function"); + assert!( + docs[1].body.starts_with("def __init__"), + "body: {:?}", + docs[1].body + ); } } diff --git a/crates/tabby/src/serve/completions/prompt.rs b/crates/tabby/src/serve/completions/prompt.rs index 998bff2..0380317 100644 --- a/crates/tabby/src/serve/completions/prompt.rs +++ b/crates/tabby/src/serve/completions/prompt.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use anyhow::{anyhow, Result}; +use anyhow::Result; use lazy_static::lazy_static; use strfmt::strfmt; use tabby_common::path::index_dir; @@ -186,14 +186,8 @@ impl IndexState { .reader_builder() .reload_policy(ReloadPolicy::OnCommit) .try_into()?; - let field_name = index - .schema() - .get_field("name") - .ok_or(anyhow!("Index doesn't have required field"))?; - let field_body = index - .schema() - .get_field("body") - .ok_or(anyhow!("Index doesn't have required field"))?; + let field_name = index.schema().get_field("name")?; + let field_body = index.schema().get_field("body")?; let query_parser = QueryParser::for_index(&index, vec![field_body]); Ok(Self { searcher: reader.searcher(), diff --git a/experimental/dataset-viewer/main.py b/experimental/scheduler/dataset.py similarity index 100% rename from experimental/dataset-viewer/main.py rename to experimental/scheduler/dataset.py diff --git a/experimental/scheduler/search.py b/experimental/scheduler/search.py new file mode 100644 index 0000000..dbc0ca3 --- /dev/null +++ b/experimental/scheduler/search.py @@ -0,0 +1,30 @@ +import requests +import streamlit as st +from typing import NamedTuple + +class Doc(NamedTuple): + name: str + body: str + score: float + + @staticmethod + def from_json(json: dict): + doc = json["doc"] + return Doc( + name=doc["name"][0], + body=doc["body"][0], + score=json["score"] + ) + +# force wide mode +st.set_page_config(layout="wide") + +query = st.text_input("Query") + +if query: + r = requests.get("http://localhost:3000/api", params=dict(q=query)) + hits = r.json()["hits"] + for x in hits: + doc = Doc.from_json(x) + st.write(doc.name + " : " + str(doc.score)) + st.code(doc.body) \ No newline at end of file