fix: llama.cpp queuing logic (#741)

2023-11-09 00:29:54 -08:00 · 2023-11-09 00:29:54 -08:00 · 8c669dee8e
parent cc5e257b74
commit 8c669dee8e
1 changed files with 6 additions and 1 deletions
--- a/crates/llama-cpp-bindings/src/llama.rs
+++ b/crates/llama-cpp-bindings/src/llama.rs
@ -58,6 +58,11 @@ impl LlamaServiceImpl {
            stop_condition,
        }) = self.next_request().await
        {
+            // Drop canceled requests.
+            if tx.is_closed() {
+                continue;
+            }
+
            let request_id = self.alloc_request_id();
            self.requests
                .insert(request_id, LlamaRunningRequest { tx, stop_condition });
@ -128,7 +133,7 @@ pub struct LlamaService {

 impl LlamaService {
    pub fn new(engine: UniquePtr<ffi::TextInferenceEngine>) -> Self {
-        let (tx, rx) = channel(20);
+        let (tx, rx) = channel(1);
        start_llama_service_impl(engine, rx);
        Self { tx }
    }