mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-18 23:49:46 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/intel.Dockerfile # .github/workflows/build-cross.yml # .github/workflows/build-sycl.yml # .github/workflows/build.yml # .github/workflows/editorconfig.yml # .github/workflows/release.yml # cmake/riscv64-spacemit-linux-gnu-gcc.cmake # docs/backend/OPENVINO.md # docs/backend/SYCL.md # docs/build-riscv64-spacemit.md # docs/ops.md # docs/ops/WebGPU.csv # embd_res/ggml-vocab-qwen35.gguf # embd_res/ggml-vocab-qwen35.gguf.inp # embd_res/ggml-vocab-qwen35.gguf.out # examples/model-conversion/Makefile # ggml/CMakeLists.txt # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-hexagon/ggml-hexagon.cpp # ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c # ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c # ggml/src/ggml-hexagon/htp/hmx-utils.h # ggml/src/ggml-hexagon/htp/htp-ops.h # ggml/src/ggml-hexagon/htp/hvx-utils.h # ggml/src/ggml-hexagon/htp/main.c # ggml/src/ggml-hexagon/htp/unary-ops.c # ggml/src/ggml-opencl/CMakeLists.txt # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-opencl/kernels/cvt.cl # ggml/src/ggml-sycl/CMakeLists.txt # ggml/src/ggml-sycl/common.cpp # ggml/src/ggml-sycl/common.hpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp # ggml/src/ggml-webgpu/ggml-webgpu.cpp # ggml/src/ggml-webgpu/wgsl-shaders/common_decls.tmpl # ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_tile.wgsl # ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_reduce.wgsl # ggml/src/ggml-webgpu/wgsl-shaders/flash_attn_vec_split.wgsl # ggml/src/ggml-webgpu/wgsl-shaders/get_rows.wgsl # ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_decls.tmpl # ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec_acc.tmpl # ggml/src/ggml-webgpu/wgsl-shaders/unary.wgsl # ggml/src/ggml-zendnn/CMakeLists.txt # ggml/src/ggml-zendnn/ggml-zendnn.cpp # scripts/snapdragon/adb/run-completion.sh # tests/CMakeLists.txt # tools/cli/README.md # tools/completion/README.md # tools/mtmd/clip-impl.h # tools/mtmd/clip.cpp # tools/mtmd/clip.h # tools/server/README.md
This commit is contained in:
commit
cc82c3164e
62 changed files with 18693 additions and 7034 deletions
|
|
@ -372,15 +372,29 @@ void common_init() {
|
|||
SetConsoleCP(CP_UTF8);
|
||||
#endif
|
||||
|
||||
llama_log_set(common_log_default_callback, NULL);
|
||||
common_log_set_prefix(common_log_main(), true);
|
||||
common_log_set_timestamps(common_log_main(), true);
|
||||
|
||||
llama_log_set(common_log_default_callback, NULL);
|
||||
}
|
||||
|
||||
void common_params_print_info(const common_params & params) {
|
||||
#ifdef NDEBUG
|
||||
const char * build_type = "";
|
||||
#else
|
||||
const char * build_type = " (debug)";
|
||||
#endif
|
||||
LOG_TRC("%s: build %d (%s) with %s for %s%s\n", __func__, llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type);
|
||||
|
||||
LOG_DBG("build: %d (%s) with %s for %s%s\n", llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type);
|
||||
LOG_INF("log_info: verbosity = %d (adjust with the `-lv N` CLI arg)\n", common_log_get_verbosity_thold());
|
||||
LOG_INF("device_info:\n");
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
||||
auto * dev = ggml_backend_dev_get(i);
|
||||
size_t free, total;
|
||||
ggml_backend_dev_memory(dev, &free, &total);
|
||||
LOG_INF(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
|
||||
}
|
||||
LOG_INF("%s\n", common_params_get_system_info(params).c_str());
|
||||
}
|
||||
|
||||
std::string common_params_get_system_info(const common_params & params) {
|
||||
|
|
@ -1153,7 +1167,8 @@ common_init_result::common_init_result(common_params & params) :
|
|||
auto cparams = common_context_params_to_llama(params);
|
||||
|
||||
if (params.fit_params) {
|
||||
LOG_INF("%s: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on\n", __func__);
|
||||
LOG_INF("%s: fitting params to device memory ...\n", __func__);
|
||||
LOG_INF("%s: (for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on)\n", __func__);
|
||||
common_fit_params(params.model.path.c_str(), &mparams, &cparams,
|
||||
params.tensor_split,
|
||||
params.tensor_buft_overrides.data(),
|
||||
|
|
@ -1202,7 +1217,7 @@ common_init_result::common_init_result(common_params & params) :
|
|||
// initialize once
|
||||
for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) {
|
||||
if (llama_vocab_is_eog(vocab, i)) {
|
||||
LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(vocab, i).c_str(), -INFINITY);
|
||||
LOG_TRC("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(vocab, i).c_str(), -INFINITY);
|
||||
params.sampling.logit_bias_eog.push_back({i, -INFINITY});
|
||||
}
|
||||
}
|
||||
|
|
@ -1215,12 +1230,12 @@ common_init_result::common_init_result(common_params & params) :
|
|||
}
|
||||
|
||||
//if (params.sampling.penalty_last_n == -1) {
|
||||
// LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
|
||||
// LOG_TRC("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
|
||||
// params.sampling.penalty_last_n = llama_n_ctx(lctx);
|
||||
//}
|
||||
|
||||
//if (params.sampling.dry_penalty_last_n == -1) {
|
||||
// LOG_INF("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
|
||||
// LOG_TRC("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
|
||||
// params.sampling.dry_penalty_last_n = llama_n_ctx(lctx);
|
||||
//}
|
||||
|
||||
|
|
@ -1428,7 +1443,7 @@ common_context_seq_rm_type common_context_can_seq_rm(llama_context * ctx) {
|
|||
|
||||
// try to remove the last tokens
|
||||
if (!llama_memory_seq_rm(mem, 0, 1, -1)) {
|
||||
LOG_WRN("%s: the context does not support partial sequence removal\n", __func__);
|
||||
LOG_TRC("%s: the context does not support partial sequence removal\n", __func__);
|
||||
res = COMMON_CONTEXT_SEQ_RM_TYPE_FULL;
|
||||
goto done;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue