From c7c67c2f90b34974f95a551f0d7b8d876a4c9176 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sat, 4 Nov 2023 14:12:31 -0700 Subject: [PATCH] fix: llama.cpp warmp logic --- crates/llama-cpp-bindings/src/engine.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc index 5b48090e053..e0addccf314 100644 --- a/crates/llama-cpp-bindings/src/engine.cc +++ b/crates/llama-cpp-bindings/src/engine.cc @@ -90,7 +90,8 @@ class TextInferenceEngineImpl : public TextInferenceEngine { batch_ = llama_batch_init(N_CTX * N_CONCURRENT_REQUESTS, 0, 1); // warm up { - for (int i = 0; i < 16; ++i) { + batch_.n_tokens = 16; + for (int i = 0; i < batch_.n_tokens; ++i) { batch_.token[i] = 0; batch_.pos[i] = i; batch_.n_seq_id[0] = 1; @@ -98,7 +99,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine { batch_.logits[i] = false; } - if (!llama_decode(ctx_.get(), batch_)) { + if (llama_decode(ctx_.get(), batch_)) { fprintf(stderr, "%s: warmup failed\n", __func__); }