Merge branch 'master' into concedo_experimental

# Conflicts: # tests/test-grad0.c
2025-09-11 01:24:36 +00:00 · 2023-06-14 11:35:43 +08:00 · 2023-06-14 11:35:43 +08:00 · f5247be0d7
commit f5247be0d7
parent 2b4a286e56 9254920265
10 changed files with 5442 additions and 265 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1036,6 +1036,12 @@ static void llama_model_load_internal(
            case 40: model.type = e_model::MODEL_13B; break;
            case 60: model.type = e_model::MODEL_30B; break;
            case 80: model.type = e_model::MODEL_65B; break;
+            default:
+                {
+                    if (hparams.n_layer < 32) {
+                        model.type = e_model::MODEL_7B;
+                    }
+                } break;
        }

        hparams.n_ctx = n_ctx;
@ -1200,6 +1206,7 @@ static void llama_model_load_internal(
                mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);

        (void) vram_scratch;
+        (void) n_batch;
 #ifdef GGML_USE_CUBLAS
        vram_scratch = n_batch * MB;
        ggml_cuda_set_scratch_size(vram_scratch);
@ -1227,6 +1234,7 @@ static void llama_model_load_internal(
        model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor);
    }

+    (void) tensor_split;
 #if defined(GGML_USE_CUBLAS)
    {
        ggml_cuda_set_tensor_split(tensor_split);
@ -2161,6 +2169,10 @@ llama_token llama_sample_token_mirostat_v2(struct llama_context * ctx, llama_tok
        return -log2f(candidate.p) > *mu;
    }));

+    if (candidates->size == 0) {
+        candidates->size = 1;
+    }
+
    // Normalize the probabilities of the remaining words
    llama_sample_softmax(ctx, candidates);

@ -3287,6 +3299,19 @@ int llama_n_embd(const struct llama_context * ctx) {
    return ctx->model.hparams.n_embd;
 }

+int llama_get_vocab(
+        const struct llama_context * ctx,
+        const char * * strings,
+        float  * scores,
+        int capacity) {
+    int n = std::min(capacity, (int) ctx->vocab.id_to_token.size());
+    for (int i = 0; i<n; ++i) {
+        strings[i] = ctx->vocab.id_to_token[i].tok.c_str();
+        scores[i]  = ctx->vocab.id_to_token[i].score;
+    }
+    return n;
+}
+
 float * llama_get_logits(struct llama_context * ctx) {
    return ctx->logits.data();
 }