diff --git a/crates/llama-cpp-bindings/llama.cpp b/crates/llama-cpp-bindings/llama.cpp index 638ff1a..f858db8 160000 --- a/crates/llama-cpp-bindings/llama.cpp +++ b/crates/llama-cpp-bindings/llama.cpp @@ -1 +1 @@ -Subproject commit 638ff1aba1fa200f0bdc0ee3709176ddd783a49d +Subproject commit f858db8db3a98968ad3764c409e43e44c443079b diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc index 9a93f36..375553a 100644 --- a/crates/llama-cpp-bindings/src/engine.cc +++ b/crates/llama-cpp-bindings/src/engine.cc @@ -100,6 +100,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine { stopped_requests_.clear(); if (requests_.size() == 0) { + llama_kv_cache_clear(ctx); return {}; }