From 16f47005dd61372a1790ce2c0662cb5a058bc062 Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Thu, 9 Nov 2023 00:20:41 -0800
Subject: [PATCH] fix: llama.cpp queuing logic

---
 crates/llama-cpp-bindings/src/llama.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/crates/llama-cpp-bindings/src/llama.rs b/crates/llama-cpp-bindings/src/llama.rs
index 975d4b1..67d9a51 100644
--- a/crates/llama-cpp-bindings/src/llama.rs
+++ b/crates/llama-cpp-bindings/src/llama.rs
@@ -58,6 +58,11 @@ impl LlamaServiceImpl {
             stop_condition,
         }) = self.next_request().await
         {
+            // Drop canceled requests.
+            if tx.is_closed() {
+                continue;
+            }
+
             let request_id = self.alloc_request_id();
             self.requests
                 .insert(request_id, LlamaRunningRequest { tx, stop_condition });
@@ -128,7 +133,7 @@ pub struct LlamaService {
 
 impl LlamaService {
     pub fn new(engine: UniquePtr<ffi::TextInferenceEngine>) -> Self {
-        let (tx, rx) = channel(20);
+        let (tx, rx) = channel(1);
         start_llama_service_impl(engine, rx);
         Self { tx }
     }