mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 00:54:41 +00:00
Fixed some GGUFv1 loading bugs, long overdue cleanup for compiling, integrated TTS
tts is functional (+6 squashed commit) Squashed commit: [22396311] wip tts [3a883027] tts not yet working [0dcfab0e] fix silly bug [a378d9ef] some long overdue cleanup [fc5a6fb5] Wip tts [39f50497] wip TTS integration
This commit is contained in:
parent
12cdcf0abe
commit
b3de1598e7
17 changed files with 1175 additions and 271 deletions
|
@ -21,12 +21,13 @@
|
|||
#include <cctype>
|
||||
#include <locale>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
//for easier compilation
|
||||
//concat source files into one file for compilation purposes
|
||||
#include "llama_v2.cpp"
|
||||
#include "llama_v3.cpp"
|
||||
#include "src/llama.cpp"
|
||||
#include "utils.cpp"
|
||||
#include "gptj_v1.cpp"
|
||||
#include "gptj_v2.cpp"
|
||||
#include "gptj_v3.cpp"
|
||||
|
@ -535,99 +536,6 @@ const char * kcpp_print_system_info(void) {
|
|||
return s.c_str();
|
||||
}
|
||||
|
||||
struct kcpp_embd_batch { //duplcated from llava_embd_batch
|
||||
std::vector<int32_t> pos;
|
||||
std::vector<int32_t> n_seq_id;
|
||||
std::vector<int32_t> seq_id_0;
|
||||
std::vector<int32_t *> seq_ids;
|
||||
std::vector<int8_t> logits;
|
||||
llama_batch batch;
|
||||
kcpp_embd_batch(float * embd, int32_t n_tokens, int32_t npast, bool use_mrope) {
|
||||
int32_t seq_id = 0;
|
||||
pos.resize(n_tokens * (use_mrope?4:1));
|
||||
std::fill(pos.begin(), pos.end(), 0);
|
||||
n_seq_id.resize(n_tokens);
|
||||
seq_ids.resize(n_tokens + 1);
|
||||
logits.resize(n_tokens);
|
||||
seq_id_0.resize(1);
|
||||
seq_id_0[0] = seq_id;
|
||||
seq_ids [n_tokens] = nullptr;
|
||||
batch = {
|
||||
/*n_tokens =*/ n_tokens,
|
||||
/*tokens =*/ nullptr,
|
||||
/*embd =*/ embd,
|
||||
/*pos =*/ pos.data(),
|
||||
/*n_seq_id =*/ n_seq_id.data(),
|
||||
/*seq_id =*/ seq_ids.data(),
|
||||
/*logits =*/ logits.data(),
|
||||
};
|
||||
|
||||
if(!use_mrope)
|
||||
{
|
||||
for (int i = 0; i < n_tokens; i++) {
|
||||
batch.pos [i] = npast + i;
|
||||
batch.n_seq_id[i] = 1;
|
||||
batch.seq_id [i] = seq_id_0.data();
|
||||
batch.logits [i] = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < n_tokens; i++) {
|
||||
batch.n_seq_id[i] = 1;
|
||||
batch.seq_id [i] = seq_id_0.data();
|
||||
batch.logits [i] = false;
|
||||
}
|
||||
for (int j = 0; j < batch.n_tokens * 3; j++) {
|
||||
batch.pos[j] = npast + (j % batch.n_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
kcpp_embd_batch(std::vector<llama_token> & tokens, int32_t npast, bool use_mrope, bool return_all_logits) {
|
||||
int32_t seq_id = 0;
|
||||
int32_t n_tokens = tokens.size();
|
||||
pos.resize(n_tokens * (use_mrope?4:1));
|
||||
std::fill(pos.begin(), pos.end(), 0);
|
||||
n_seq_id.resize(n_tokens);
|
||||
seq_ids.resize(n_tokens + 1);
|
||||
logits.resize(n_tokens);
|
||||
seq_id_0.resize(1);
|
||||
seq_id_0[0] = seq_id;
|
||||
seq_ids[n_tokens] = nullptr;
|
||||
batch = {
|
||||
/*n_tokens =*/ n_tokens,
|
||||
/*tokens =*/ tokens.data(),
|
||||
/*embd =*/ nullptr,
|
||||
/*pos =*/ pos.data(),
|
||||
/*n_seq_id =*/ n_seq_id.data(),
|
||||
/*seq_id =*/ seq_ids.data(),
|
||||
/*logits =*/ logits.data(),
|
||||
};
|
||||
|
||||
if(!use_mrope)
|
||||
{
|
||||
for (int i = 0; i < n_tokens; i++) {
|
||||
batch.pos [i] = npast + i;
|
||||
batch.n_seq_id[i] = 1;
|
||||
batch.seq_id [i] = seq_id_0.data();
|
||||
batch.logits [i] = (return_all_logits?true:false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < n_tokens; i++) {
|
||||
batch.n_seq_id[i] = 1;
|
||||
batch.seq_id [i] = seq_id_0.data();
|
||||
batch.logits [i] = (return_all_logits?true:false);
|
||||
}
|
||||
for (int j = 0; j < batch.n_tokens * 3; j++) {
|
||||
batch.pos[j] = npast + (j % batch.n_tokens);
|
||||
}
|
||||
}
|
||||
batch.logits[n_tokens - 1] = true;
|
||||
}
|
||||
};
|
||||
|
||||
//loads a model for speculative decoding.
|
||||
static void speculative_decoding_setup(std::string spec_model_filename, const llama_model_params & base_model_params, const llama_context_params & base_ctx_params, int base_n_vocab, const float * draft_gpusplit, int draft_gpulayers)
|
||||
{
|
||||
|
@ -664,7 +572,7 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll
|
|||
draft_ctx_params.type_k = base_ctx_params.type_k;
|
||||
draft_ctx_params.type_v = base_ctx_params.type_v;
|
||||
|
||||
llama_model * draftmodel = llama_load_model_from_file(spec_model_filename.c_str(), draft_model_params);
|
||||
llama_model * draftmodel = llama_model_load_from_file(spec_model_filename.c_str(), draft_model_params);
|
||||
draft_ctx = llama_new_context_with_model(draftmodel, draft_ctx_params);
|
||||
if(draft_ctx == NULL)
|
||||
{
|
||||
|
@ -2252,7 +2160,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
kvos.push_back(kvo);
|
||||
model_params.kv_overrides = kvos.data();
|
||||
}
|
||||
llama_model * llamamodel = llama_load_model_from_file(kcpp_data->model_filename.c_str(), model_params);
|
||||
llama_model * llamamodel = llama_model_load_from_file(kcpp_data->model_filename.c_str(), model_params);
|
||||
|
||||
if(overwriteRope)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue