From d452488c4b325aef1a67845354ea0bb6dc15aa94 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Mon, 1 May 2023 15:06:06 +0800 Subject: [PATCH] Remove FLAGS_enable_meilisearch and FLAGS_rewrite_prompt_with_search_snippet (#122) --- Dockerfile | 6 -- tabby/admin/Home.py | 7 --- tabby/scripts/flags.sh | 2 - tabby/scripts/tabby.sh | 13 ----- tabby/server/backend/prompt_rewriter.py | 74 ------------------------- tabby/server/backend/triton.py | 16 +----- tabby/tasks/update_dataset.yaml | 12 ---- 7 files changed, 1 insertion(+), 129 deletions(-) delete mode 100644 tabby/server/backend/prompt_rewriter.py diff --git a/Dockerfile b/Dockerfile index 61d70c2..6d381d6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,12 +61,6 @@ RUN <= 3] - - # Remove tokens in language reserved_keywords. - tokens = set([x for x in tokens if x not in preset.reserved_keywords]) - - if len(tokens) > 3: - return " ".join(tokens) - else: - raise PromptRewriteFailed("Too few tokens extracted from prompt") - - def rewrite(self, preset: LanguagePreset, prompt: str) -> str: - if preset.reserved_keywords is None: - raise PromptRewriteFailed("Rewrite requires language keywords list") - - index = self.meili_client.index("dataset") - query = self.create_query(preset, prompt) - logger.debug("query: {}", query) - search_results = index.search( - query, - { - "limit": 3, - "attributesToCrop": ["content"], - "cropLength": 32, - "cropMarker": "", - "attributesToRetrieve": ["content"], - }, - ) - - if len(search_results["hits"]) == 0: - raise PromptRewriteFailed("No related snippets") - - def make_snippet(i, content): - content = content["_formatted"]["content"] - return f"== snippet {i+1} ==\n{content}" - - snippets = "\n".join( - [make_snippet(i, x) for i, x in enumerate(search_results["hits"])] - ) - prompt = f"""Given following relevant code snippet, generate code completion based on context. -{snippets} -== context == -{prompt}""" - logger.debug("prompt: {}", prompt) - return prompt - - def __call__(self, preset: LanguagePreset, prompt: str) -> str: - try: - return self.rewrite(preset, prompt) - except PromptRewriteFailed: - return prompt - - -class PromptRewriteFailed(Exception): - pass diff --git a/tabby/server/backend/triton.py b/tabby/server/backend/triton.py index 41aadd5..1bf1a19 100644 --- a/tabby/server/backend/triton.py +++ b/tabby/server/backend/triton.py @@ -1,4 +1,3 @@ -import os import time from typing import List @@ -9,13 +8,8 @@ from tritonclient.utils import InferenceServerException, np_to_triton_dtype from ..models import Choice, CompletionRequest, CompletionResponse from .language_presets import LanguagePresets -from .prompt_rewriter import PromptRewriter from .utils import random_completion_id, trim_with_stop_words -FLAGS_rewrite_prompt_with_search_snippet = os.environ.get( - "FLAGS_rewrite_prompt_with_search_snippet", None -) - class TritonService: def __init__( @@ -30,11 +24,6 @@ class TritonService: url=f"{host}:{port}", verbose=verbose ) - if FLAGS_rewrite_prompt_with_search_snippet: - self.rewriter = PromptRewriter() - else: - self.rewriter = None - def generate(self, data: CompletionRequest) -> List[Choice]: n = 1 np_type = np.uint32 @@ -44,10 +33,7 @@ class TritonService: if preset is None: return [] - if self.rewriter: - prompt = self.rewriter(preset, data.prompt) - else: - prompt = data.prompt + prompt = data.prompt input_start_ids = np.expand_dims(self.tokenizer.encode(prompt), 0) input_start_ids = np.repeat(input_start_ids, n, axis=0).astype(np_type) diff --git a/tabby/tasks/update_dataset.yaml b/tabby/tasks/update_dataset.yaml index f351f73..82159a4 100644 --- a/tabby/tasks/update_dataset.yaml +++ b/tabby/tasks/update_dataset.yaml @@ -7,7 +7,6 @@ env: - GIT_REPOSITORIES_DIR: "$GIT_REPOSITORIES_DIR" - DATASET_DIR: "$DATASET_DIR" - HOME: "$HOME" - - FLAGS_enable_meilisearch: "$FLAGS_enable_meilisearch" steps: - name: update repositories dir: $APP_DIR @@ -18,14 +17,3 @@ steps: command: python -m tabby.tools.build_dataset --project_dir=$GIT_REPOSITORIES_DIR --output_dir=$DATASET_DIR depends: - update repositories - - - name: refresh index - dir: $APP_DIR - preconditions: - - condition: "$FLAGS_enable_meilisearch" - expected: "1" - depends: - - generate dataset - command: | - curl -X DELETE 'http://localhost:8084/indexes/dataset/documents' - curl -X POST 'http://localhost:8084/indexes/dataset/documents?primaryKey=id' -H 'Content-Type: application/x-ndjson' --data-binary @$DATASET_DIR/dumps.json