From af6ae1efb27a9a7c3f7f7f84639d2243f7303ac1 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Sun, 30 Mar 2025 00:07:37 +0100 Subject: [PATCH 1/3] llama : fix non-causal mask for gemma 3 (#12615) --- src/llama-context.cpp | 4 +- src/llama-graph.cpp | 174 +++++++++++++++++------------------------- 2 files changed, 72 insertions(+), 106 deletions(-) diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 9467c3a01..3479a8cca 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1317,8 +1317,8 @@ int llama_context::decode(llama_batch & inp_batch) { n_outputs = n_outputs_new; } - // non-causal masks do not use the KV cache - if (hparams.causal_attn) { + // find KV slot + { kv_self_update(); // if we have enough unused cells before the current head -> diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 0bd401744..cec203df4 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -402,120 +402,86 @@ void llm_graph_input_attn_no_cache::set_input(const llama_ubatch * ubatch) { void llm_graph_input_attn_kv_unified::set_input(const llama_ubatch * ubatch) { if (self_kq_mask || self_kq_mask_swa) { - // NOTE: hparams.causal_attn indicates the model is capable of generation and uses the kv cache. - if (cparams.causal_attn) { - const int64_t n_kv = kv_self->n; - const int64_t n_tokens = ubatch->n_tokens; - const int64_t n_seq_tokens = ubatch->n_seq_tokens; - const int64_t n_seqs = ubatch->n_seqs; + const int64_t n_kv = kv_self->n; + const int64_t n_tokens = ubatch->n_tokens; + const int64_t n_seq_tokens = ubatch->n_seq_tokens; + const int64_t n_seqs = ubatch->n_seqs; - float * data = nullptr; - float * data_swa = nullptr; - - if (self_kq_mask) { - GGML_ASSERT(ggml_backend_buffer_is_host(self_kq_mask->buffer)); - data = (float *) self_kq_mask->data; - } - - if (self_kq_mask_swa) { - GGML_ASSERT(ggml_backend_buffer_is_host(self_kq_mask_swa->buffer)); - data_swa = (float *) self_kq_mask_swa->data; - } - - // For causal attention, use only the previous KV cells - // of the correct sequence for each token of the ubatch. - // It's assumed that if a token in the batch has multiple sequences, they are equivalent. - for (int h = 0; h < 1; ++h) { - for (int s = 0; s < n_seqs; ++s) { - const llama_seq_id seq_id = ubatch->seq_id[s][0]; - - for (int j = 0; j < n_seq_tokens; ++j) { - const llama_pos pos = ubatch->pos[s*n_seq_tokens + j]; - - for (int i = 0; i < n_kv; ++i) { - float f; - if (!kv_self->cells[i].has_seq_id(seq_id) || kv_self->cells[i].pos > pos) { - f = -INFINITY; - } else { - if (hparams.use_alibi) { - f = -std::abs(kv_self->cells[i].pos - pos); - } else { - f = 0.0f; - } - } - - if (data) { - data[h*(n_kv*n_tokens) + s*(n_kv*n_seq_tokens) + j*n_kv + i] = f; - } - - // may need to cut off old tokens for sliding window - if (data_swa) { - if (pos - kv_self->cells[i].pos >= (int32_t)hparams.n_swa) { - f = -INFINITY; - } - data_swa[h*(n_kv*n_tokens) + s*(n_kv*n_seq_tokens) + j*n_kv + i] = f; - } - } - } - } - - if (data) { - for (int i = n_tokens; i < GGML_PAD(n_tokens, GGML_KQ_MASK_PAD); ++i) { - for (int j = 0; j < n_kv; ++j) { - data[h*(n_kv*n_tokens) + i*n_kv + j] = -INFINITY; - } - } - } - - if (data_swa) { - for (int i = n_tokens; i < GGML_PAD(n_tokens, GGML_KQ_MASK_PAD); ++i) { - for (int j = 0; j < n_kv; ++j) { - data_swa[h*(n_kv*n_tokens) + i*n_kv + j] = -INFINITY; - } - } - } - } - } else { - const int64_t n_tokens = ubatch->n_tokens; - const int64_t n_seq_tokens = ubatch->n_seq_tokens; - const int64_t n_seqs = ubatch->n_seqs; - // when using kv cache, the mask needs to match the kv cache size - const int64_t n_stride = n_tokens; + float * data = nullptr; + float * data_swa = nullptr; + if (self_kq_mask) { GGML_ASSERT(ggml_backend_buffer_is_host(self_kq_mask->buffer)); + data = (float *) self_kq_mask->data; + } - float * data = (float *) self_kq_mask->data; + if (self_kq_mask_swa) { + GGML_ASSERT(ggml_backend_buffer_is_host(self_kq_mask_swa->buffer)); + data_swa = (float *) self_kq_mask_swa->data; + } - for (int h = 0; h < 1; ++h) { - for (int s1 = 0; s1 < n_seqs; ++s1) { - const llama_seq_id seq_id = ubatch->seq_id[s1][0]; + // Use only the previous KV cells of the correct sequence for each token of the ubatch. + // It's assumed that if a token in the batch has multiple sequences, they are equivalent. + // Example with a cache of 10 tokens, 2 tokens populated in cache and 3 tokens in batch: + // Causal mask: + // xxx------- + // xxxx------ + // xxxxx----- + // Non-causal mask: + // xxxxx----- + // xxxxx----- + // xxxxx----- + // To visualize the mask, see https://github.com/ggml-org/llama.cpp/pull/12615 + for (int h = 0; h < 1; ++h) { + for (int s = 0; s < n_seqs; ++s) { + const llama_seq_id seq_id = ubatch->seq_id[s][0]; - for (int j = 0; j < n_seq_tokens; ++j) { - const int32_t tj = s1*n_seq_tokens + j; - - for (int s0 = 0; s0 < n_seqs; ++s0) { - for (int i = 0; i < n_seq_tokens; ++i) { - const int32_t ti = s0*n_seq_tokens + i; - float f = -INFINITY; - - for (int s = 0; s < ubatch->n_seq_id[s0]; ++s) { - if (ubatch->seq_id[s0][s] == seq_id) { - if (hparams.use_alibi) { - f = -std::abs(ubatch->pos[ti] - ubatch->pos[tj]); - } else { - f = 0.0f; - } - break; - } - } - - data[h*(n_tokens*n_tokens) + tj*n_stride + ti] = f; + for (int j = 0; j < n_seq_tokens; ++j) { + const llama_pos pos = ubatch->pos[s*n_seq_tokens + j]; + for (int i = 0; i < n_kv; ++i) { + float f; + // mask the token if: + if (!kv_self->cells[i].has_seq_id(seq_id) // not the correct sequence + || (cparams.causal_attn && kv_self->cells[i].pos > pos) // for causal, mask future tokens + ) { + f = -INFINITY; + } else { + if (hparams.use_alibi) { + f = -std::abs(kv_self->cells[i].pos - pos); + } else { + f = 0.0f; } } - for (int i = n_tokens; i < n_stride; ++i) { - data[h*(n_tokens*n_tokens) + tj*n_stride + i] = -INFINITY; + if (data) { + data[h*(n_kv*n_tokens) + s*(n_kv*n_seq_tokens) + j*n_kv + i] = f; } + + // may need to cut off old tokens for sliding window + if (data_swa) { + if (pos - kv_self->cells[i].pos >= (int32_t)hparams.n_swa) { + f = -INFINITY; + } + data_swa[h*(n_kv*n_tokens) + s*(n_kv*n_seq_tokens) + j*n_kv + i] = f; + } + } + } + } + + // mask padded tokens + if (data) { + for (int i = n_tokens; i < GGML_PAD(n_tokens, GGML_KQ_MASK_PAD); ++i) { + for (int j = 0; j < n_kv; ++j) { + data[h*(n_kv*n_tokens) + i*n_kv + j] = -INFINITY; + } + } + } + + // mask padded tokens + if (data_swa) { + for (int i = n_tokens; i < GGML_PAD(n_tokens, GGML_KQ_MASK_PAD); ++i) { + for (int j = 0; j < n_kv; ++j) { + data_swa[h*(n_kv*n_tokens) + i*n_kv + j] = -INFINITY; } } } From 3891e183c61c1e25c2c61afe68d7b95019ea3f89 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 20 Mar 2025 07:02:18 +0100 Subject: [PATCH 2/3] examples : command.wasm updates (whisper/2904) This commit updates the command.wasm example by adding a server.py script to make it easy to start a local http server to try out the example, updates the build instructions, and also addresses some of the compiler warnings that were being generated. * emscripten : fix TOTAL_STACK for wasm This commit moves the TOTAL_STACK setting from the compile flags to the linker flags. This is because the TOTAL_STACK setting is a linker setting. The motivation for this change is that currently the following warnings are generated when building: ```console em++: warning: linker setting ignored during compilation: 'TOTAL_STACK' [-Wunused-command-line-argument] em++: warning: linker setting ignored during compilation: 'TOTAL_STACK' [-Wunused-command-line-argument] em++: warning: linker setting ignored during compilation: 'TOTAL_STACK' [-Wunused-command-line-argument] em++: warning: linker setting ignored during compilation: 'TOTAL_STACK' [-Wunused-command-line-argument] em++: warning: linker setting ignored during compilation: 'TOTAL_STACK' [-Wunused-command-line-argument] em++: warning: linker setting ignored during compilation: 'TOTAL_STACK' [-Wunused-command-line-argument] ``` * examples : suppress C++17 deprecation warning for std::codecvt_utf8 This commit suppresses the C++17 deprecation warning for std::codecvt_utf8 similar to what is done in examples/talk-llama/unicode.cpp. The motivation for this change is to suppress these warnings: ```console /Users/danbev/work/ai/whisper-work/examples/common.cpp:251:31: warning: 'codecvt_utf8' is deprecated [-Wdeprecated-declarations] 251 | std::wstring_convert> converter; | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/codecvt:193:28: note: 'codecvt_utf8' has been explicitly marked deprecated here 193 | class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 codecvt_utf8 : public __codecvt_utf8<_Elem> { | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/__config:723:41: note: expanded from macro '_LIBCPP_DEPRECATED_IN_CXX17' 723 | # define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/__config:688:49: note: expanded from macro '_LIBCPP_DEPRECATED' 688 | # define _LIBCPP_DEPRECATED __attribute__((__deprecated__)) | ^ /Users/danbev/work/ai/whisper-work/examples/common.cpp:251:10: warning: 'wstring_convert>' is deprecated [-Wdeprecated-declarations] 251 | std::wstring_convert> converter; | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/locale:3145:28: note: 'wstring_convert>' has been explicitly marked deprecated here 3145 | class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 wstring_convert { | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/__config:723:41: note: expanded from macro '_LIBCPP_DEPRECATED_IN_CXX17' 723 | # define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/__config:688:49: note: expanded from macro '_LIBCPP_DEPRECATED' 688 | # define _LIBCPP_DEPRECATED __attribute__((__deprecated__)) | ^ /Users/danbev/work/ai/whisper-work/examples/common.cpp:257:31: warning: 'codecvt_utf8' is deprecated [-Wdeprecated-declarations] 257 | std::wstring_convert> converter; | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/codecvt:193:28: note: 'codecvt_utf8' has been explicitly marked deprecated here 193 | class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 codecvt_utf8 : public __codecvt_utf8<_Elem> { | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/__config:723:41: note: expanded from macro '_LIBCPP_DEPRECATED_IN_CXX17' 723 | # define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/__config:688:49: note: expanded from macro '_LIBCPP_DEPRECATED' 688 | # define _LIBCPP_DEPRECATED __attribute__((__deprecated__)) | ^ /Users/danbev/work/ai/whisper-work/examples/common.cpp:257:10: warning: 'wstring_convert>' is deprecated [-Wdeprecated-declarations] 257 | std::wstring_convert> converter; | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/locale:3145:28: note: 'wstring_convert>' has been explicitly marked deprecated here 3145 | class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 wstring_convert { | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/__config:723:41: note: expanded from macro '_LIBCPP_DEPRECATED_IN_CXX17' 723 | # define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED | ^ /Users/danbev/work/wasm/emsdk/upstream/emscripten/cache/sysroot/include/c++/v1/__config:688:49: note: expanded from macro '_LIBCPP_DEPRECATED' 688 | # define _LIBCPP_DEPRECATED __attribute__((__deprecated__)) | ^ 4 warnings generated. ``` * ggml : suppress double-promotion warning in GGML_F16x4_REDUCE This commit adds a cast to `ggml_float` in the `GGML_F16x4_REDUCE` macro to suppress a double-promotion warning. Currently the following warning is generated when compiling the command.wasm example: ```console /whisper-work/src/ggml-cpu/ggml-cpu.c:1592:5: warning: implicit conversion increases floating-point precision: 'float' to 'ggml_float' (aka 'double') [-Wdouble-promotion] 1592 | GGML_F16_VEC_REDUCE(sumf, sum); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /Users/danbev/work/ai/whisper-work/src/ggml-cpu/ggml-cpu.c:932:37: note: expanded from macro 'GGML_F16_VEC_REDUCE' 932 | #define GGML_F16_VEC_REDUCE GGML_F16x4_REDUCE | ^ /Users/danbev/work/ai/whisper-work/src/ggml-cpu/ggml-cpu.c:920:44: note: expanded from macro 'GGML_F16x4_REDUCE' 918 | res = wasm_f32x4_extract_lane(x[0], 0) + \ | ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 919 | wasm_f32x4_extract_lane(x[0], 1) + \ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 920 | wasm_f32x4_extract_lane(x[0], 2) + \ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~ 921 | wasm_f32x4_extract_lane(x[0], 3); \ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /whisper-work/src/ggml-cpu/ggml-cpu.c:1640:9: warning: implicit conversion increases floating-point precision: 'float' to 'ggml_float' (aka 'double') [-Wdouble-promotion] 1640 | GGML_F16_VEC_REDUCE(sumf[k], sum[k]); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /Users/danbev/work/ai/whisper-work/src/ggml-cpu/ggml-cpu.c:932:37: note: expanded from macro 'GGML_F16_VEC_REDUCE' 932 | #define GGML_F16_VEC_REDUCE GGML_F16x4_REDUCE | ^ /Users/danbev/work/ai/whisper-work/src/ggml-cpu/ggml-cpu.c:920:44: note: expanded from macro 'GGML_F16x4_REDUCE' 918 | res = wasm_f32x4_extract_lane(x[0], 0) + \ | ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 919 | wasm_f32x4_extract_lane(x[0], 1) + \ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 920 | wasm_f32x4_extract_lane(x[0], 2) + \ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~ 921 | wasm_f32x4_extract_lane(x[0], 3); \ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2 warnings generated. ``` wasm_f32x4_extract_lane returns a 32-bit float and this is what the addition is performed on. But there is an implicit conversion from 32-bit float to 64-bit double when the result is assigned to `res`, which is of type `ggml_float`. My understanding here is that this is intentional and adding a cast to `ggml_float` should suppress the warning. * emscripten : add -Wno-deprecated to for emscripten This commit adds -Wno-deprecated to the CMAKE_CXX_FLAGS for emscripten builds. The motivation for this is that currently there a number of warnings generated like the following: ```console warning: JS library symbol '$print' is deprecated. Please open a bug if you have a continuing need for this symbol [-Wdeprecated] warning: JS library symbol '$printErr' is deprecated. Please open a bug if you have a continuing need for this symbol [-Wdeprecated] em++: warning: warnings in JS library compilation [-Wjs-compiler] em++: warning: linker setting ignored during compilation: 'ENVIRONMENT' [-Wunused-command-line-argument] warning: JS library symbol '$print' is deprecated. Please open a bug if you have a continuing need for this symbol [-Wdeprecated] warning: JS library symbol '$printErr' is deprecated. Please open a bug if you have a continuing need for this symbol [-Wdeprecated] em++: warning: warnings in JS library compilation [-Wjs-compiler] warning: JS library symbol '$print' is deprecated. Please open a bug if you have a continuing need for this symbol [-Wdeprecated] warning: JS library symbol '$printErr' is deprecated. Please open a bug if you have a continuing need for this symbol [-Wdeprecated] em++: warning: warnings in JS library compilation [-Wjs-compiler] em++: warning: linker setting ignored during compilation: 'ENVIRONMENT' [-Wunused-command-line-argument] em++: warning: linker setting ignored during compilation: 'ENVIRONMENT' [-Wunused-command-line-argument] ``` The downside of this is that we might miss other deprecation warnings in the future so I'm not sure if this is acceptable. But it make the wasm examples cleaner without the warnings. * examples : fix tautological-compare warning in stb_vorbis.c [no ci] This commit applies a fix to address a tautological-compare warning in stb_vorbis.c. The motivation for this is that currently the following warning is generated when compiling the commmand-wasm example: ```console /Users/danbev/work/ai/whisper-work/examples/stb_vorbis.c:1404:75: warning: pointer comparison always evaluates to false [-Wtautological-compare] 1404 | if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) { | ^ 1 warning generated. ``` This fix was taken from an open pull request on the stb repository that addreses this issue: https://github.com/nothings/stb/pull/1746 * squash! examples : update command.wasm instructions [no ci] This commit adds a Python script to serve the the wasm examples build in the `build-em` directory. Initially I thought that it would be enough to start a simple python server but I did not notice that there was an error in the browser console when I did that: ```console command.js:1 Uncaught (in promise) DataCloneError: Failed to execute 'postMessage' on 'Worker': SharedArrayBuffer transfer requires self.crossOriginIsolated. at command.js:1:1206224 at new Promise () at loadWasmModuleToWorker (command.js:1:1204981) at Array.map () at Object.loadWasmModuleToAllWorkers (command.js:1:1206428) at command.js:1:1204318 at callRuntimeCallbacks (command.js:1:1202062) at preRun (command.js:1:6136) at run (command.js:1:1294094) at removeRunDependency (command.js:1:7046) ``` We need a few CORS headers to be set and in order hopefully make this easy for users a Python script is added to the examples directory. This should be able to server all the wasm examples provided they have been built. command.wasm's README.md is updated to reflect this change. * examples : remove unused functions This commit removed the unused functions convert_to_utf8 and convert_to_wstring from examples/common.cpp. * Revert "examples : fix tautological-compare warning in stb_vorbis.c [no ci]" This reverts commit 8e3c47d96141c7675c985562ebdc705e839e338a. We should not make this change here and instead when the upstream PR is merged we can sync with it. Refs: https://github.com/ggerganov/whisper.cpp/issues/2784 --- ggml/src/ggml-cpu/ggml-cpu.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index fde837aed..d2c5feec4 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -901,24 +901,24 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) { #define GGML_F16x4_FMA GGML_F32x4_FMA #define GGML_F16x4_ADD wasm_f32x4_add #define GGML_F16x4_MUL wasm_f32x4_mul -#define GGML_F16x4_REDUCE(res, x) \ -{ \ - int offset = GGML_F16_ARR >> 1; \ - for (int i = 0; i < offset; ++i) { \ - x[i] = wasm_f32x4_add(x[i], x[offset+i]); \ - } \ - offset >>= 1; \ - for (int i = 0; i < offset; ++i) { \ - x[i] = wasm_f32x4_add(x[i], x[offset+i]); \ - } \ - offset >>= 1; \ - for (int i = 0; i < offset; ++i) { \ - x[i] = wasm_f32x4_add(x[i], x[offset+i]); \ - } \ - res = wasm_f32x4_extract_lane(x[0], 0) + \ - wasm_f32x4_extract_lane(x[0], 1) + \ - wasm_f32x4_extract_lane(x[0], 2) + \ - wasm_f32x4_extract_lane(x[0], 3); \ +#define GGML_F16x4_REDUCE(res, x) \ +{ \ + int offset = GGML_F16_ARR >> 1; \ + for (int i = 0; i < offset; ++i) { \ + x[i] = wasm_f32x4_add(x[i], x[offset+i]); \ + } \ + offset >>= 1; \ + for (int i = 0; i < offset; ++i) { \ + x[i] = wasm_f32x4_add(x[i], x[offset+i]); \ + } \ + offset >>= 1; \ + for (int i = 0; i < offset; ++i) { \ + x[i] = wasm_f32x4_add(x[i], x[offset+i]); \ + } \ + res = (ggml_float) (wasm_f32x4_extract_lane(x[0], 0) + \ + wasm_f32x4_extract_lane(x[0], 1) + \ + wasm_f32x4_extract_lane(x[0], 2) + \ + wasm_f32x4_extract_lane(x[0], 3)); \ } #define GGML_F16_VEC GGML_F16x4 From e408d4351a4ad143656672479d8ae1479306ce31 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 24 Mar 2025 09:53:38 +0100 Subject: [PATCH 3/3] ggml : add logging for native build options/vars (whisper/2935) This commit adds debug level logging for the native build options and variables to ggml/CMakeLists.txt. The motivation for this is that it can be useful to see the effective result of `GGML_NATIVE`, `GGML_NATIVE_DEFAULT`, and `INS_ENB` for a cmake build. I've found myself adding similar logging a few times now, so I thought it might be a good idea to add this. Example output, specifying `-DCMAKE_MESSAGE_LOG_LEVEL=DEBUG` when running cmake produces the following output: ```console -- GGML_NATIVE : OFF -- GGML_NATIVE_DEFAULT : OFF -- INS_ENB : OFF ``` --- ggml/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 0332c26f1..d33f843b4 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -100,6 +100,10 @@ else() set(INS_ENB ON) endif() +message(DEBUG "GGML_NATIVE : ${GGML_NATIVE}") +message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}") +message(DEBUG "INS_ENB : ${INS_ENB}") + option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF) option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON) option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)