From 138b7459c53b2d96ca36e4b379127372d219f748 Mon Sep 17 00:00:00 2001 From: Erfan Safari Date: Thu, 9 Nov 2023 23:24:23 +0330 Subject: [PATCH] feat: add LLAMA_CPP_N_THREADS env (#742) * feat: add LLAMA_CPP_N_THREADS and LLAMA_CPP_N_THREADS_BATCH envs * apply format * improve: use LLAMA_CPP_N_THREADS for both n_threads and n_threads_batch * Update crates/llama-cpp-bindings/src/engine.cc --------- Co-authored-by: Meng Zhang --- crates/llama-cpp-bindings/src/engine.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/llama-cpp-bindings/src/engine.cc b/crates/llama-cpp-bindings/src/engine.cc index 3c261ae..2a762b5 100644 --- a/crates/llama-cpp-bindings/src/engine.cc +++ b/crates/llama-cpp-bindings/src/engine.cc @@ -316,8 +316,12 @@ std::unique_ptr create_engine(bool use_gpu, rust::Str model llama_context_params ctx_params = llama_context_default_params(); ctx_params.n_ctx = N_CTX * parallelism; ctx_params.n_batch = N_BATCH; + if (const char* n_thread_str = std::getenv("LLAMA_CPP_N_THREADS")) { + int n_threads = std::stoi(n_thread_str); + ctx_params.n_threads = n_threads; + ctx_params.n_threads_batch = n_threads; + } llama_context* ctx = llama_new_context_with_model(model, ctx_params); - return std::make_unique( owned(model, llama_free_model), owned(ctx, llama_free),