Merge branch 'master' into concedo_experimental

# Conflicts:
#	.devops/nix/package.nix
#	CMakeLists.txt
#	README.md
#	ggml-metal.m
#	ggml.c
This commit is contained in:
Concedo 2024-01-08 14:18:49 +08:00
commit f04b6e7287
18 changed files with 195 additions and 191 deletions

View file

@ -2191,7 +2191,11 @@ struct llama_model_loader {
type_max = type;
}
// LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str());
// TODO: make runtime configurable
#if 0
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
LLAMA_LOG_INFO("%s: - tensor %4d: %32s %-8s [ %s ]\n", __func__, i, ggml_get_name(meta), ggml_type_name(type), llama_format_tensor_shape(meta).c_str());
#endif
}
switch (type_max) {
@ -4801,7 +4805,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
@ -4925,7 +4928,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * pos;
@ -5024,9 +5026,7 @@ struct llm_build_context {
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_gqa == n_embd);
const int64_t n_rot = n_embd_head_k / 2;
@ -5238,9 +5238,7 @@ struct llm_build_context {
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
@ -5333,7 +5331,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
@ -5429,7 +5426,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
@ -5756,7 +5752,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * attn_norm_output;
@ -5980,7 +5975,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * pos;