feat: add model warmup logic (#693)
parent
0e4a2d2a12
commit
4c7eae584e
|
|
@ -88,6 +88,22 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
|
|||
model_(std::move(model)),
|
||||
ctx_(std::move(ctx)) {
|
||||
batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1);
|
||||
// warm up
|
||||
{
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
batch_.token[i] = 0;
|
||||
batch_.pos[i] = i;
|
||||
batch_.n_seq_id[0] = 1;
|
||||
batch_.seq_id[i][0] = 0;
|
||||
batch_.logits[i] = false;
|
||||
}
|
||||
|
||||
if (!llama_decode(ctx_.get(), batch_)) {
|
||||
fprintf(stderr, "%s: warmup failed\n", __func__);
|
||||
}
|
||||
|
||||
llama_kv_cache_clear(ctx_.get());
|
||||
}
|
||||
}
|
||||
|
||||
~TextInferenceEngineImpl() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue