mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-14 19:09:45 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/bench.yml # .github/workflows/build.yml # .github/workflows/python-check-requirements.yml # README.md # docs/backend/SYCL.md # flake.lock # ggml/CMakeLists.txt # ggml/src/kompute-shaders/op_rope_f16.comp # ggml/src/kompute-shaders/op_rope_f32.comp # ggml/src/kompute-shaders/rope_common.comp
This commit is contained in:
commit
e8de0af3ec
18 changed files with 1326 additions and 101 deletions
|
@ -3594,13 +3594,8 @@ namespace GGUFMeta {
|
|||
|
||||
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
|
||||
|
||||
// TODO: update when needed or think of some clever automatic way to do this
|
||||
static size_t llama_model_max_nodes(const llama_model & /*model*/) {
|
||||
//if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
|
||||
// return 32768;
|
||||
//}
|
||||
|
||||
return 8192;
|
||||
static size_t llama_model_max_nodes(const llama_model & model) {
|
||||
return std::max<size_t>(8192, model.tensors_by_name.size()*5);
|
||||
}
|
||||
|
||||
struct llama_model_loader {
|
||||
|
@ -14800,12 +14795,15 @@ static int llama_decode_internal(
|
|||
res = nullptr;
|
||||
embd = nullptr;
|
||||
} else if (cparams.embeddings) {
|
||||
res = nullptr; // do not extract logits for embedding case
|
||||
embd = gf->nodes[gf->n_nodes - 1];
|
||||
if (strcmp(embd->name, "result_embd_pooled") != 0) {
|
||||
embd = gf->nodes[gf->n_nodes - 2];
|
||||
res = nullptr; // do not extract logits for embedding case
|
||||
embd = nullptr;
|
||||
for (int i = gf->n_nodes - 1; i >= 0; --i) {
|
||||
if (strcmp(gf->nodes[i]->name, "result_embd_pooled") == 0) {
|
||||
embd = gf->nodes[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(strcmp(embd->name, "result_embd_pooled") == 0 && "missing embeddings tensor");
|
||||
GGML_ASSERT(embd != nullptr && "missing embeddings tensor");
|
||||
} else {
|
||||
embd = nullptr; // do not extract embeddings when not needed
|
||||
GGML_ASSERT(strcmp(res->name, "result_output") == 0 && "missing result_output tensor");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue