From fbcab616d700fe5d52fd24a4afb2e6f05e8f4b32 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Mon, 20 Mar 2023 23:07:20 +0800 Subject: [PATCH] Update --- server/triton.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/triton.py b/server/triton.py index f854cdd..1fbb505 100644 --- a/server/triton.py +++ b/server/triton.py @@ -25,10 +25,12 @@ class TritonService: ) def generate(self, data: CompletionsRequest) -> List[Choice]: + # FIXME(meng): Make following vars configurable n = 1 np_type = np.uint32 max_tokens = 128 model_name = "fastertransformer" + stop_words = ["\n\n"] prompt = data.prompt input_start_ids = np.expand_dims(self.tokenizer.encode(prompt), 0) @@ -39,7 +41,6 @@ class TritonService: prompt_tokens: int = input_len[0][0] output_len = np.ones_like(input_len).astype(np_type) * max_tokens - stop_words = ["\n\n"] stop_word_list = np.repeat( to_word_list_format([stop_words], self.tokenizer), input_start_ids.shape[0],