fix: llama.cpp warmp logic

feat-adding-an-auto-completion-component
Meng Zhang 2023-11-04 14:12:31 -07:00
parent bdd9e6c18a
commit c7c67c2f90
1 changed files with 3 additions and 2 deletions

View File

@ -90,7 +90,8 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1);
// warm up
{
for (int i = 0; i < 16; ++i) {
batch_.n_tokens = 16;
for (int i = 0; i < batch_.n_tokens; ++i) {
batch_.token[i] = 0;
batch_.pos[i] = i;
batch_.n_seq_id[0] = 1;
@ -98,7 +99,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
batch_.logits[i] = false;
}
if (!llama_decode(ctx_.get(), batch_)) {
if (llama_decode(ctx_.get(), batch_)) {
fprintf(stderr, "%s: warmup failed\n", __func__);
}