mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 19:46:11 +00:00
kv-cache : support V-less cache (#19067)
* kv-cache : support V-less cache * cuda : better check for V_is_K_view * cuda : improve V_is_K_view check * graph : add comments * hparams : refactor
This commit is contained in:
parent
70d860824a
commit
d9c6ce46f7
11 changed files with 246 additions and 53 deletions
|
|
@ -793,7 +793,7 @@ float * llama_context::get_embeddings_ith(int32_t i) {
|
|||
throw std::runtime_error(format("corrupt output buffer (j=%" PRId64 ", n_outputs=%d)", j, n_outputs));
|
||||
}
|
||||
|
||||
const uint32_t n_embd_out = model.hparams.get_n_embd_out();
|
||||
const uint32_t n_embd_out = model.hparams.n_embd_out();
|
||||
return embd + j*n_embd_out;
|
||||
} catch (const std::exception & err) {
|
||||
LLAMA_LOG_ERROR("%s: invalid embeddings id %d, reason: %s\n", __func__, i, err.what());
|
||||
|
|
@ -1279,7 +1279,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
|
|||
{
|
||||
// extract token embeddings
|
||||
GGML_ASSERT(embd != nullptr);
|
||||
const uint32_t n_embd_out = hparams.get_n_embd_out();
|
||||
const uint32_t n_embd_out = hparams.n_embd_out();
|
||||
|
||||
GGML_ASSERT(n_tokens*n_embd_out <= (int64_t) embd_size);
|
||||
ggml_backend_tensor_get_async(backend_embd, t_embd, embd, 0, n_tokens*n_embd_out*sizeof(float));
|
||||
|
|
@ -1688,7 +1688,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
|
|||
{
|
||||
// extract token embeddings
|
||||
GGML_ASSERT(embd != nullptr);
|
||||
const uint32_t n_embd_out = hparams.get_n_embd_out();
|
||||
const uint32_t n_embd_out = hparams.n_embd_out();
|
||||
float * embd_out = embd + n_outputs_prev*n_embd_out;
|
||||
|
||||
if (n_outputs) {
|
||||
|
|
@ -1821,7 +1821,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs, const llama_batch & ba
|
|||
|
||||
const auto n_batch = cparams.n_batch;
|
||||
const auto n_vocab = vocab.n_tokens();
|
||||
const auto n_embd_out = hparams.get_n_embd_out();
|
||||
const auto n_embd_out = hparams.n_embd_out();
|
||||
|
||||
bool has_logits = true;
|
||||
bool has_embd = cparams.embeddings;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue