Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	docs/build.md
#	examples/batched/batched.cpp
#	examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
#	examples/deprecation-warning/deprecation-warning.cpp
#	examples/eval-callback/eval-callback.cpp
#	examples/gen-docs/gen-docs.cpp
#	examples/gguf-hash/gguf-hash.cpp
#	examples/gguf/gguf.cpp
#	examples/lookahead/lookahead.cpp
#	examples/lookup/lookup-create.cpp
#	examples/lookup/lookup-merge.cpp
#	examples/lookup/lookup-stats.cpp
#	examples/lookup/lookup.cpp
#	examples/parallel/parallel.cpp
#	examples/passkey/passkey.cpp
#	examples/retrieval/retrieval.cpp
#	examples/save-load-state/save-load-state.cpp
#	examples/simple-chat/simple-chat.cpp
#	examples/simple/simple.cpp
#	examples/speculative-simple/speculative-simple.cpp
#	examples/speculative/speculative.cpp
#	examples/sycl/ls-sycl-device.cpp
#	examples/training/finetune.cpp
#	ggml/src/ggml-cpu/CMakeLists.txt
#	ggml/src/ggml-cpu/amx/common.h
#	ggml/src/ggml-cpu/kleidiai/kernels.cpp
#	ggml/src/ggml-opencl/CMakeLists.txt
#	ggml/src/ggml-opencl/ggml-opencl.cpp
#	ggml/src/ggml-opencl/kernels/cvt.cl
#	ggml/src/ggml-opencl/kernels/gemv_noshuffle_general_q8_0_f32.cl
#	ggml/src/ggml-opencl/kernels/transpose.cl
#	ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp
#	ggml/src/ggml-webgpu/ggml-webgpu.cpp
#	ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl
#	ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_reg_tile.wgsl
#	ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_subgroup_matrix.wgsl
#	scripts/get-wikitext-2.sh
#	tests/test-backend-ops.cpp
#	tools/batched-bench/batched-bench.cpp
#	tools/cvector-generator/cvector-generator.cpp
#	tools/export-lora/export-lora.cpp
#	tools/imatrix/imatrix.cpp
#	tools/llama-bench/llama-bench.cpp
#	tools/perplexity/perplexity.cpp
#	tools/rpc/rpc-server.cpp
#	tools/tokenize/tokenize.cpp
This commit is contained in:
Concedo 2026-03-06 21:19:49 +08:00
commit d20e60ddd5
13 changed files with 96 additions and 22 deletions

View file

@ -7,6 +7,7 @@
#include "chat.h"
#include "build-info.h"
#include <clocale>
#include <cstdio>
#include <cstring>
#include <ctime>
@ -85,6 +86,8 @@ static void sigint_handler(int signo) {
#endif
int main(int argc, char ** argv) {
std::setlocale(LC_NUMERIC, "C");
common_params params;
g_params = &params;
@ -377,7 +380,7 @@ int main(int argc, char ** argv) {
// remove any "future" tokens that we might have inherited from the previous session
if (session_tokens.size() > n_match) {
if (!llama_memory_seq_rm(mem, -1, n_match, -1)) {
LOG_WRN("%s: unable to resuse common prefix (for example, when the memory is recurrent)\n", __func__);
LOG_WRN("%s: unable to reuse common prefix (for example, when the memory is recurrent)\n", __func__);
llama_memory_clear(mem, true);
session_tokens.clear();
n_match = 0;