mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # .github/workflows/release.yml # .github/workflows/server.yml # README.md # docs/build.md # docs/install.md # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-opencl/CMakeLists.txt # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/mmvq.cpp # ggml/src/ggml-sycl/vecdotq.hpp # tests/test-backend-ops.cpp # tests/test-chat.cpp
This commit is contained in:
commit
bc89b465a8
35 changed files with 1070 additions and 288 deletions
|
@ -1,6 +1,7 @@
|
|||
#include "llama-kv-cache-recurrent.h"
|
||||
|
||||
#include "llama-impl.h"
|
||||
#include "llama-io.h"
|
||||
#include "llama-batch.h"
|
||||
#include "llama-model.h"
|
||||
|
||||
|
@ -386,6 +387,13 @@ llama_memory_state_ptr llama_kv_cache_recurrent::init_full() {
|
|||
return std::make_unique<llama_kv_cache_recurrent_state>(LLAMA_MEMORY_STATUS_SUCCESS, this);
|
||||
}
|
||||
|
||||
llama_memory_state_ptr llama_kv_cache_recurrent::init_update(llama_context * lctx, bool optimize) {
|
||||
GGML_UNUSED(lctx);
|
||||
GGML_UNUSED(optimize);
|
||||
|
||||
return std::make_unique<llama_kv_cache_recurrent_state>(LLAMA_MEMORY_STATUS_NO_UPDATE);
|
||||
}
|
||||
|
||||
bool llama_kv_cache_recurrent::prepare(const std::vector<llama_ubatch> & ubatches) {
|
||||
// simply remember the full state because it is very small for this type of cache
|
||||
// TODO: optimize
|
||||
|
@ -419,17 +427,6 @@ bool llama_kv_cache_recurrent::prepare(const std::vector<llama_ubatch> & ubatche
|
|||
return success;
|
||||
}
|
||||
|
||||
bool llama_kv_cache_recurrent::update(llama_context & lctx) {
|
||||
GGML_UNUSED(lctx);
|
||||
// noop
|
||||
return false;
|
||||
}
|
||||
|
||||
void llama_kv_cache_recurrent::defrag_sched(float thold) {
|
||||
GGML_UNUSED(thold);
|
||||
// noop
|
||||
}
|
||||
|
||||
bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) {
|
||||
const uint32_t n_tokens = ubatch.n_tokens;
|
||||
const uint32_t n_seqs = ubatch.n_seqs;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue