feat: update llama.cpp to fetch latest starcoder support (#452)

* feat: bump llama.cpp to HEAD * fix: turn off add_bos by default
2023-09-16 11:41:49 +08:00 · 2023-09-16 11:41:49 +08:00 · 97eeb6b926
parent 86e48afbe0
commit 97eeb6b926
2 changed files with 2 additions and 2 deletions
--- a/crates/llama-cpp-bindings/llama.cpp
+++ b/crates/llama-cpp-bindings/llama.cpp
@ -1 +1 @@
-Subproject commit 06fc4020de0b92ee13407fdabca7870f53c75de5
+Subproject commit 31e7f3c20e1bacb522021e46788e24c045c108f6
--- a/crates/llama-cpp-bindings/src/engine.cc
+++ b/crates/llama-cpp-bindings/src/engine.cc
@ -47,7 +47,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
  uint32_t start(const rust::Str prompt, size_t max_input_length) const override {
    auto* ctx = ctx_.get();
    llama_reset_timings(ctx);
-    std::vector<llama_token> tokens_list = tokenize(ctx, std::string(prompt), max_input_length, /* add_bos = */ true);
+    std::vector<llama_token> tokens_list = tokenize(ctx, std::string(prompt), max_input_length, /* add_bos = */ false);

    for (size_t i = 0; i < tokens_list.size(); i += N_BATCH) {
      const size_t size = std::min(N_BATCH, tokens_list.size() - i);