mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # README.md # ci/run.sh # docs/build.md # examples/CMakeLists.txt # examples/parallel/parallel.cpp # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # scripts/server-bench.py # src/llama-kv-cache-unified.cpp # tests/test-backend-ops.cpp # tools/batched-bench/batched-bench.cpp # tools/server/README.md
This commit is contained in:
commit
bdff33e0de
47 changed files with 3128 additions and 509 deletions
|
@ -338,6 +338,9 @@ extern "C" {
|
|||
bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
||||
// NOTE: setting to false when n_seq_max > 1 can cause bad performance in some cases
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573
|
||||
bool kv_unified; // use a unified buffer across the input sequences when computing the attention
|
||||
// try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/14363
|
||||
};
|
||||
|
||||
// model quantization parameters
|
||||
|
@ -728,7 +731,7 @@ extern "C" {
|
|||
// - lazily on next llama_decode()
|
||||
// p0 < 0 : [0, p1]
|
||||
// p1 < 0 : [p0, inf)
|
||||
DEPRECATED(void llama_kv_self_seq_div(
|
||||
DEPRECATED(LLAMA_API void llama_kv_self_seq_div(
|
||||
struct llama_context * ctx,
|
||||
llama_seq_id seq_id,
|
||||
llama_pos p0,
|
||||
|
@ -1008,6 +1011,7 @@ extern "C" {
|
|||
LLAMA_API llama_token llama_vocab_sep(const struct llama_vocab * vocab); // sentence separator
|
||||
LLAMA_API llama_token llama_vocab_nl (const struct llama_vocab * vocab); // next-line
|
||||
LLAMA_API llama_token llama_vocab_pad(const struct llama_vocab * vocab); // padding
|
||||
LLAMA_API llama_token llama_vocab_mask(const struct llama_vocab * vocab); // mask
|
||||
|
||||
LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
|
||||
LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue