mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # .devops/nix/package.nix # CMakeLists.txt # README.md # ggml-metal.m # ggml.c
This commit is contained in:
commit
f04b6e7287
18 changed files with 195 additions and 191 deletions
16
llama.cpp
16
llama.cpp
|
@ -2191,7 +2191,11 @@ struct llama_model_loader {
|
|||
type_max = type;
|
||||
}
|
||||
|
||||
// LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str());
|
||||
// TODO: make runtime configurable
|
||||
#if 0
|
||||
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
||||
LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
switch (type_max) {
|
||||
|
@ -4801,7 +4805,6 @@ struct llm_build_context {
|
|||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
||||
|
||||
struct ggml_tensor * cur;
|
||||
struct ggml_tensor * inpL;
|
||||
|
@ -4925,7 +4928,6 @@ struct llm_build_context {
|
|||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
||||
|
||||
struct ggml_tensor * cur;
|
||||
struct ggml_tensor * pos;
|
||||
|
@ -5024,9 +5026,7 @@ struct llm_build_context {
|
|||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
||||
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
||||
|
||||
const int64_t n_rot = n_embd_head_k / 2;
|
||||
|
||||
|
@ -5238,9 +5238,7 @@ struct llm_build_context {
|
|||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
||||
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
||||
|
||||
struct ggml_tensor * cur;
|
||||
struct ggml_tensor * inpL;
|
||||
|
@ -5333,7 +5331,6 @@ struct llm_build_context {
|
|||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
||||
|
||||
struct ggml_tensor * cur;
|
||||
struct ggml_tensor * inpL;
|
||||
|
@ -5429,7 +5426,6 @@ struct llm_build_context {
|
|||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
||||
|
||||
struct ggml_tensor * cur;
|
||||
struct ggml_tensor * inpL;
|
||||
|
@ -5756,7 +5752,6 @@ struct llm_build_context {
|
|||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
||||
|
||||
struct ggml_tensor * cur;
|
||||
struct ggml_tensor * attn_norm_output;
|
||||
|
@ -5980,7 +5975,6 @@ struct llm_build_context {
|
|||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_gqa == n_embd);
|
||||
|
||||
struct ggml_tensor * cur;
|
||||
struct ggml_tensor * pos;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue