From 7330d75de6b7faacd1443d0b1e46f43d00abf8f3 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sun, 29 Oct 2023 16:30:16 -0700 Subject: [PATCH] chore: clear cache when there's no active requests --- crates/llama-cpp-bindings/llama.cpp | 2 +- crates/llama-cpp-bindings/src/engine.cc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/llama-cpp-bindings/llama.cpp b/crates/llama-cpp-bindings/llama.cpp index 638ff1a..f858db8 160000 --- a/crates/llama-cpp-bindings/llama.cpp +++ b/crates/llama-cpp-bindings/llama.cpp @@ -1 +1 @@ -Subproject commit 638ff1aba1fa200f0bdc0ee3709176ddd783a49d +Subproject commit f858db8db3a98968ad3764c409e43e44c443079b diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc index 9a93f36..375553a 100644 --- a/crates/llama-cpp-bindings/src/engine.cc +++ b/crates/llama-cpp-bindings/src/engine.cc @@ -100,6 +100,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine { stopped_requests_.clear(); if (requests_.size() == 0) { + llama_kv_cache_clear(ctx); return {}; }