fix: llama.cpp queuing logic (#741)

refactor-extract-code
Meng Zhang 2023-11-09 00:29:54 -08:00 committed by GitHub
parent cc5e257b74
commit 8c669dee8e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 1 deletions

View File

@ -58,6 +58,11 @@ impl LlamaServiceImpl {
stop_condition,
}) = self.next_request().await
{
// Drop canceled requests.
if tx.is_closed() {
continue;
}
let request_id = self.alloc_request_id();
self.requests
.insert(request_id, LlamaRunningRequest { tx, stop_condition });
@ -128,7 +133,7 @@ pub struct LlamaService {
impl LlamaService {
pub fn new(engine: UniquePtr<ffi::TextInferenceEngine>) -> Self {
let (tx, rx) = channel(20);
let (tx, rx) = channel(1);
start_llama_service_impl(engine, rx);
Self { tx }
}