feat: add LLAMA_CPP_N_THREADS env (#742)
* feat: add LLAMA_CPP_N_THREADS and LLAMA_CPP_N_THREADS_BATCH envs * apply format * improve: use LLAMA_CPP_N_THREADS for both n_threads and n_threads_batch * Update crates/llama-cpp-bindings/src/engine.cc --------- Co-authored-by: Meng Zhang <meng@tabbyml.com>refactor-extract-code
parent
4bbcdfa38e
commit
138b7459c5
|
|
@ -316,8 +316,12 @@ std::unique_ptr<TextInferenceEngine> create_engine(bool use_gpu, rust::Str model
|
|||
llama_context_params ctx_params = llama_context_default_params();
|
||||
ctx_params.n_ctx = N_CTX * parallelism;
|
||||
ctx_params.n_batch = N_BATCH;
|
||||
if (const char* n_thread_str = std::getenv("LLAMA_CPP_N_THREADS")) {
|
||||
int n_threads = std::stoi(n_thread_str);
|
||||
ctx_params.n_threads = n_threads;
|
||||
ctx_params.n_threads_batch = n_threads;
|
||||
}
|
||||
llama_context* ctx = llama_new_context_with_model(model, ctx_params);
|
||||
|
||||
return std::make_unique<TextInferenceEngineImpl>(
|
||||
owned<llama_model>(model, llama_free_model),
|
||||
owned<llama_context>(ctx, llama_free),
|
||||
|
|
|
|||
Loading…
Reference in New Issue