mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # tests/test-grad0.c
This commit is contained in:
commit
f5247be0d7
10 changed files with 5442 additions and 265 deletions
25
llama.cpp
25
llama.cpp
|
@ -1036,6 +1036,12 @@ static void llama_model_load_internal(
|
|||
case 40: model.type = e_model::MODEL_13B; break;
|
||||
case 60: model.type = e_model::MODEL_30B; break;
|
||||
case 80: model.type = e_model::MODEL_65B; break;
|
||||
default:
|
||||
{
|
||||
if (hparams.n_layer < 32) {
|
||||
model.type = e_model::MODEL_7B;
|
||||
}
|
||||
} break;
|
||||
}
|
||||
|
||||
hparams.n_ctx = n_ctx;
|
||||
|
@ -1200,6 +1206,7 @@ static void llama_model_load_internal(
|
|||
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
|
||||
|
||||
(void) vram_scratch;
|
||||
(void) n_batch;
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
vram_scratch = n_batch * MB;
|
||||
ggml_cuda_set_scratch_size(vram_scratch);
|
||||
|
@ -1227,6 +1234,7 @@ static void llama_model_load_internal(
|
|||
model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor);
|
||||
}
|
||||
|
||||
(void) tensor_split;
|
||||
#if defined(GGML_USE_CUBLAS)
|
||||
{
|
||||
ggml_cuda_set_tensor_split(tensor_split);
|
||||
|
@ -2161,6 +2169,10 @@ llama_token llama_sample_token_mirostat_v2(struct llama_context * ctx, llama_tok
|
|||
return -log2f(candidate.p) > *mu;
|
||||
}));
|
||||
|
||||
if (candidates->size == 0) {
|
||||
candidates->size = 1;
|
||||
}
|
||||
|
||||
// Normalize the probabilities of the remaining words
|
||||
llama_sample_softmax(ctx, candidates);
|
||||
|
||||
|
@ -3287,6 +3299,19 @@ int llama_n_embd(const struct llama_context * ctx) {
|
|||
return ctx->model.hparams.n_embd;
|
||||
}
|
||||
|
||||
int llama_get_vocab(
|
||||
const struct llama_context * ctx,
|
||||
const char * * strings,
|
||||
float * scores,
|
||||
int capacity) {
|
||||
int n = std::min(capacity, (int) ctx->vocab.id_to_token.size());
|
||||
for (int i = 0; i<n; ++i) {
|
||||
strings[i] = ctx->vocab.id_to_token[i].tok.c_str();
|
||||
scores[i] = ctx->vocab.id_to_token[i].score;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
float * llama_get_logits(struct llama_context * ctx) {
|
||||
return ctx->logits.data();
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue