feat: add model warmup logic (#693)

release-notes-05
Meng Zhang 2023-11-02 16:07:32 -07:00 committed by GitHub
parent 0e4a2d2a12
commit 4c7eae584e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 16 additions and 0 deletions

View File

@ -88,6 +88,22 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
model_(std::move(model)),
ctx_(std::move(ctx)) {
batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1);
// warm up
{
for (int i = 0; i < 16; ++i) {
batch_.token[i] = 0;
batch_.pos[i] = i;
batch_.n_seq_id[0] = 1;
batch_.seq_id[i][0] = 0;
batch_.logits[i] = false;
}
if (!llama_decode(ctx_.get(), batch_)) {
fprintf(stderr, "%s: warmup failed\n", __func__);
}
llama_kv_cache_clear(ctx_.get());
}
}
~TextInferenceEngineImpl() {