mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-16 20:09:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # README.md # examples/CMakeLists.txt # examples/batched/batched.cpp # examples/gritlm/gritlm.cpp # examples/llama.android/llama/build.gradle.kts # examples/main/README.md # examples/retrieval/retrieval.cpp # examples/server/CMakeLists.txt # examples/server/README.md # ggml/CMakeLists.txt # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml.c # scripts/compare-commits.sh # scripts/sync-ggml.last # tests/CMakeLists.txt # tests/test-backend-ops.cpp # tests/test-chat-template.cpp # tests/test-sampling.cpp
This commit is contained in:
commit
ee486bad3e
59 changed files with 20531 additions and 13185 deletions
|
@ -482,9 +482,6 @@ extern "C" {
|
|||
// Returns the total number of parameters in the model
|
||||
LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model);
|
||||
|
||||
// Get a llama model tensor
|
||||
LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);
|
||||
|
||||
// Returns true if the model contains an encoder that requires llama_encode() call
|
||||
LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
|
||||
|
||||
|
@ -1141,16 +1138,12 @@ extern "C" {
|
|||
const char * grammar_str,
|
||||
const char * grammar_root);
|
||||
|
||||
/// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
|
||||
int32_t n_vocab, // llama_n_vocab()
|
||||
llama_token special_eos_id, // llama_token_eos()
|
||||
llama_token linefeed_id, // llama_token_nl()
|
||||
int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat, // 1.0 = disabled
|
||||
float penalty_freq, // 0.0 = disabled
|
||||
float penalty_present, // 0.0 = disabled
|
||||
bool penalize_nl, // consider newlines as a repeatable token
|
||||
bool ignore_eos); // ignore the end-of-sequence token
|
||||
int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat, // 1.0 = disabled
|
||||
float penalty_freq, // 0.0 = disabled
|
||||
float penalty_present); // 0.0 = disabled
|
||||
|
||||
/// @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_dry(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue