fix: llama.cpp warmp logic

r0.5
Meng Zhang 2023-11-04 14:12:31 -07:00
parent 0b6108dfc2
commit 01ce18fe1a
1 changed files with 3 additions and 2 deletions

View File

@ -90,7 +90,8 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1); batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1);
// warm up // warm up
{ {
for (int i = 0; i < 16; ++i) { batch_.n_tokens = 16;
for (int i = 0; i < batch_.n_tokens; ++i) {
batch_.token[i] = 0; batch_.token[i] = 0;
batch_.pos[i] = i; batch_.pos[i] = i;
batch_.n_seq_id[0] = 1; batch_.n_seq_id[0] = 1;
@ -98,7 +99,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
batch_.logits[i] = false; batch_.logits[i] = false;
} }
if (!llama_decode(ctx_.get(), batch_)) { if (llama_decode(ctx_.get(), batch_)) {
fprintf(stderr, "%s: warmup failed\n", __func__); fprintf(stderr, "%s: warmup failed\n", __func__);
} }