feat: add model warmup logic (#693)

2023-11-02 16:07:32 -07:00 · 2023-11-02 16:07:32 -07:00 · 4c7eae584e
parent 0e4a2d2a12
commit 4c7eae584e
1 changed files with 16 additions and 0 deletions
--- a/crates/llama-cpp-bindings/src/engine.cc
+++ b/crates/llama-cpp-bindings/src/engine.cc
@ -88,6 +88,22 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
    model_(std::move(model)),
    ctx_(std::move(ctx)) {
      batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1);
+      // warm up
+      {
+        for (int i = 0; i < 16; ++i) {
+          batch_.token[i] = 0;
+          batch_.pos[i] = i;
+          batch_.n_seq_id[0] = 1;
+          batch_.seq_id[i][0] = 0;
+          batch_.logits[i] = false;
+        }
+
+        if (!llama_decode(ctx_.get(), batch_)) {
+          fprintf(stderr, "%s: warmup failed\n", __func__);
+        }
+
+        llama_kv_cache_clear(ctx_.get());
+      }
  }

  ~TextInferenceEngineImpl() {