mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 02:19:41 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/full-rocm.Dockerfile # .devops/llama-cli-rocm.Dockerfile # .devops/llama-server-rocm.Dockerfile # .github/workflows/build.yml # .github/workflows/python-type-check.yml # CMakeLists.txt # CONTRIBUTING.md # README.md # ci/run.sh # examples/embedding/embedding.cpp # examples/server/README.md # flake.lock # ggml/include/ggml.h # ggml/src/ggml.c # requirements/requirements-convert_legacy_llama.txt # scripts/sync-ggml.last # src/llama-vocab.cpp # src/llama.cpp # tests/test-backend-ops.cpp # tests/test-grad0.cpp # tests/test-tokenizer-0.cpp
This commit is contained in:
commit
ce7f9c9a2c
39 changed files with 103400 additions and 102738 deletions
|
@ -102,6 +102,7 @@ extern "C" {
|
|||
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
|
||||
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
|
||||
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
|
||||
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
|
||||
};
|
||||
|
||||
enum llama_rope_type {
|
||||
|
@ -192,6 +193,7 @@ extern "C" {
|
|||
LLAMA_POOLING_TYPE_MEAN = 1,
|
||||
LLAMA_POOLING_TYPE_CLS = 2,
|
||||
LLAMA_POOLING_TYPE_LAST = 3,
|
||||
LLAMA_POOLING_TYPE_RANK = 4, // used by reranking models to attach the classification head to the graph
|
||||
};
|
||||
|
||||
enum llama_attention_type {
|
||||
|
@ -201,9 +203,9 @@ extern "C" {
|
|||
};
|
||||
|
||||
enum llama_split_mode {
|
||||
LLAMA_SPLIT_MODE_NONE = 0, // single GPU
|
||||
LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
|
||||
LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
|
||||
LLAMA_SPLIT_MODE_NONE = 0, // single GPU
|
||||
LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
|
||||
LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
|
||||
};
|
||||
|
||||
// TODO: simplify (https://github.com/ggerganov/llama.cpp/pull/9294#pullrequestreview-2286561979)
|
||||
|
@ -873,7 +875,8 @@ extern "C" {
|
|||
|
||||
// Get the embeddings for a sequence id
|
||||
// Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
|
||||
// shape: [n_embd] (1-dimensional)
|
||||
// when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
|
||||
// otherwise: float[n_embd] (1-dimensional)
|
||||
LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);
|
||||
|
||||
//
|
||||
|
@ -912,6 +915,8 @@ extern "C" {
|
|||
//
|
||||
// Tokenization
|
||||
//
|
||||
// The API is thread-safe.
|
||||
//
|
||||
|
||||
/// @details Convert the provided text into tokens.
|
||||
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue