Merge commit '00131d6eaf' into concedo_experimental

# Conflicts:
#	docs/ops.md
#	examples/save-load-state/save-load-state.cpp
#	ggml/CMakeLists.txt
#	ggml/src/ggml-cann/aclnn_ops.cpp
#	ggml/src/ggml-cann/aclnn_ops.h
#	ggml/src/ggml-cann/ggml-cann.cpp
#	ggml/src/ggml-hip/CMakeLists.txt
#	ggml/src/ggml-sycl/cpy.cpp
#	ggml/src/ggml-sycl/cpy.hpp
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	ggml/src/ggml-sycl/set_rows.cpp
#	scripts/server-bench.py
#	tests/CMakeLists.txt
#	tests/test-backend-ops.cpp
#	tests/test-thread-safety.cpp
#	tools/llama-bench/llama-bench.cpp
This commit is contained in:
Concedo 2025-08-02 10:15:39 +08:00
commit b04362f831
34 changed files with 17144 additions and 4130 deletions

View file

@ -81,6 +81,14 @@ int main(int argc, char ** argv) {
params.embedding = true;
// if the number of prompts that would be encoded is known in advance, it's more efficient to specify the
// --parallel argument accordingly. for convenience, if not specified, we fallback to unified KV cache
// in order to support any number of prompts
if (params.n_parallel == 1) {
LOG_INF("%s: n_parallel == 1 -> unified KV cache is enabled\n", __func__);
params.kv_unified = true;
}
// utilize the full context
if (params.n_batch < params.n_ctx) {
LOG_WRN("%s: setting batch size to %d\n", __func__, params.n_ctx);