feat: add model warmup logic (#693)

2023-11-02 16:07:32 -07:00 · 2023-11-02 16:07:32 -07:00 · 4c7eae584e
parent 0e4a2d2a12
commit 4c7eae584e
1 changed files with 16 additions and 0 deletions
--- a/crates/llama-cpp-bindings/src/engine.cc
+++ b/crates/llama-cpp-bindings/src/engine.cc
@ -88,6 +88,22 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
    model_(std::move(model)),
    ctx_(std::move(ctx)) {
      batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1);
      // warm up
      {
        for (int i = 0; i < 16; ++i) {
          batch_.token[i] = 0;
          batch_.pos[i] = i;
          batch_.n_seq_id[0] = 1;
          batch_.seq_id[i][0] = 0;
          batch_.logits[i] = false;
        }
        if (!llama_decode(ctx_.get(), batch_)) {
          fprintf(stderr, "%s: warmup failed\n", __func__);
        }
        llama_kv_cache_clear(ctx_.get());
      }
  }
  ~TextInferenceEngineImpl() {