mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
broken commit
This commit is contained in:
commit
2a00ee8fa8
36 changed files with 5868 additions and 5479 deletions
|
@ -6,7 +6,6 @@
|
|||
#include "llama.h"
|
||||
#include "build-info.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
|
@ -164,6 +163,8 @@ int main(int argc, char ** argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads);
|
||||
|
||||
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
|
||||
|
@ -197,7 +198,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
llama_attach_threadpool(ctx, threadpool, threadpool_batch);
|
||||
|
||||
const int n_ctx_train = llama_n_ctx_train(model);
|
||||
const int n_ctx_train = llama_model_n_ctx_train(model);
|
||||
const int n_ctx = llama_n_ctx(ctx);
|
||||
|
||||
if (n_ctx > n_ctx_train) {
|
||||
|
@ -242,9 +243,9 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
const bool add_bos = llama_add_bos_token(model);
|
||||
const bool add_bos = llama_vocab_get_add_bos(vocab);
|
||||
if (!llama_model_has_encoder(model)) {
|
||||
GGML_ASSERT(!llama_add_eos_token(model));
|
||||
GGML_ASSERT(!llama_vocab_get_add_eos(vocab));
|
||||
}
|
||||
|
||||
LOG_DBG("n_ctx: %d, add_bos: %d\n", n_ctx, add_bos);
|
||||
|
@ -270,7 +271,7 @@ int main(int argc, char ** argv) {
|
|||
// Should not run without any tokens
|
||||
if (embd_inp.empty()) {
|
||||
if (add_bos) {
|
||||
embd_inp.push_back(llama_token_bos(model));
|
||||
embd_inp.push_back(llama_vocab_bos(vocab));
|
||||
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
|
||||
} else {
|
||||
LOG_ERR("input is empty\n");
|
||||
|
@ -496,7 +497,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
|
||||
if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
|
||||
decoder_start_token_id = llama_token_bos(model);
|
||||
decoder_start_token_id = llama_vocab_bos(vocab);
|
||||
}
|
||||
|
||||
embd_inp.clear();
|
||||
|
@ -743,7 +744,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// deal with end of generation tokens in interactive mode
|
||||
if (llama_token_is_eog(model, common_sampler_last(smpl))) {
|
||||
if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
|
||||
LOG_DBG("found an EOG token\n");
|
||||
|
||||
if (params.interactive) {
|
||||
|
@ -777,7 +778,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
if (params.input_prefix_bos) {
|
||||
LOG_DBG("adding input prefix BOS token\n");
|
||||
embd_inp.push_back(llama_token_bos(model));
|
||||
embd_inp.push_back(llama_vocab_bos(vocab));
|
||||
}
|
||||
|
||||
std::string buffer;
|
||||
|
@ -831,8 +832,8 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// if user stop generation mid-way, we must add EOT to finish model's last response
|
||||
if (need_insert_eot && format_chat) {
|
||||
llama_token eot = llama_token_eot(model);
|
||||
embd_inp.push_back(eot == LLAMA_TOKEN_NULL ? llama_token_eos(model) : eot);
|
||||
llama_token eot = llama_vocab_eot(vocab);
|
||||
embd_inp.push_back(eot == LLAMA_TOKEN_NULL ? llama_vocab_eos(vocab) : eot);
|
||||
need_insert_eot = false;
|
||||
}
|
||||
|
||||
|
@ -867,7 +868,7 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// end of generation
|
||||
if (!embd.empty() && llama_token_is_eog(model, embd.back()) && !(params.interactive)) {
|
||||
if (!embd.empty() && llama_vocab_is_eog(vocab, embd.back()) && !(params.interactive)) {
|
||||
LOG(" [end of text]\n");
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue