feat: update llama.cpp to fetch latest starcoder support (#452)
* feat: bump llama.cpp to HEAD * fix: turn off add_bos by defaultrelease-0.2
parent
86e48afbe0
commit
97eeb6b926
|
|
@ -1 +1 @@
|
|||
Subproject commit 06fc4020de0b92ee13407fdabca7870f53c75de5
|
||||
Subproject commit 31e7f3c20e1bacb522021e46788e24c045c108f6
|
||||
|
|
@ -47,7 +47,7 @@ class TextInferenceEngineImpl : public TextInferenceEngine {
|
|||
uint32_t start(const rust::Str prompt, size_t max_input_length) const override {
|
||||
auto* ctx = ctx_.get();
|
||||
llama_reset_timings(ctx);
|
||||
std::vector<llama_token> tokens_list = tokenize(ctx, std::string(prompt), max_input_length, /* add_bos = */ true);
|
||||
std::vector<llama_token> tokens_list = tokenize(ctx, std::string(prompt), max_input_length, /* add_bos = */ false);
|
||||
|
||||
for (size_t i = 0; i < tokens_list.size(); i += N_BATCH) {
|
||||
const size_t size = std::min(N_BATCH, tokens_list.size() - i);
|
||||
|
|
|
|||
Loading…
Reference in New Issue