From c32fa21db8a631e9127e55f69a3d2bdaa9f71824 Mon Sep 17 00:00:00 2001 From: Jay Zenith <162098309+JayZenith@users.noreply.github.com> Date: Tue, 30 Dec 2025 06:27:49 -0800 Subject: [PATCH 01/18] sampling: reuse token data buffer in llama_sampler_sample (#18365) * sampling: reuse token data buffer in llama_sampler_sample * move cur buffer before timing section, after samplers * minor : fix build --------- Co-authored-by: Georgi Gerganov --- src/llama-sampling.cpp | 77 ++++++++++++++++++++++++------------------ src/llama-sampling.h | 3 ++ 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index d96f619ae..f3891453e 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -421,39 +421,6 @@ void llama_sampler_free(struct llama_sampler * smpl) { delete smpl; } -llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_context * ctx, int32_t idx) { - const auto * logits = llama_get_logits_ith(ctx, idx); - - const llama_model * model = llama_get_model(ctx); - const llama_vocab * vocab = llama_model_get_vocab(model); - - const int n_vocab = llama_vocab_n_tokens(vocab); - - // TODO: do not allocate each time - std::vector cur; - cur.reserve(n_vocab); - for (llama_token token_id = 0; token_id < n_vocab; token_id++) { - cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f}); - } - - llama_token_data_array cur_p = { - /* .data = */ cur.data(), - /* .size = */ cur.size(), - /* .selected = */ -1, - /* .sorted = */ false, - }; - - llama_sampler_apply(smpl, &cur_p); - - GGML_ASSERT(cur_p.selected >= 0 && cur_p.selected < (int32_t) cur_p.size); - - auto token = cur_p.data[cur_p.selected].id; - - llama_sampler_accept(smpl, token); - - return token; -} - // sampler chain static const char * llama_sampler_chain_name(const struct llama_sampler * /*smpl*/) { @@ -527,12 +494,56 @@ struct llama_sampler * llama_sampler_chain_init(struct llama_sampler_chain_param /* .ctx = */ new llama_sampler_chain { /* .params = */ params, /* .samplers = */ {}, + /* .cur = */ {}, /* .t_sample_us = */ 0, /* .n_sample = */ 0, } ); } +llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_context * ctx, int32_t idx) { + const auto * logits = llama_get_logits_ith(ctx, idx); + + const llama_model * model = llama_get_model(ctx); + const llama_vocab * vocab = llama_model_get_vocab(model); + + const int n_vocab = llama_vocab_n_tokens(vocab); + + // use pre-allocated buffer from chain if available, otherwise allocate locally + std::vector * cur_ptr; + std::vector cur_local; + + if (smpl->iface == &llama_sampler_chain_i) { + auto * chain = (llama_sampler_chain *) smpl->ctx; + cur_ptr = &chain->cur; + } else { + cur_ptr = &cur_local; + } + + auto & cur = *cur_ptr; + cur.resize(n_vocab); + for (llama_token token_id = 0; token_id < n_vocab; token_id++) { + cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f}; + } + + llama_token_data_array cur_p = { + /* .data = */ cur.data(), + /* .size = */ cur.size(), + /* .selected = */ -1, + /* .sorted = */ false, + }; + + llama_sampler_apply(smpl, &cur_p); + + GGML_ASSERT(cur_p.selected >= 0 && cur_p.selected < (int32_t) cur_p.size); + + auto token = cur_p.data[cur_p.selected].id; + + llama_sampler_accept(smpl, token); + + return token; +} + void llama_sampler_chain_add(struct llama_sampler * chain, struct llama_sampler * smpl) { auto * p = (llama_sampler_chain *) chain->ctx; p->samplers.push_back(smpl); diff --git a/src/llama-sampling.h b/src/llama-sampling.h index 759dd7dcb..1e3de4e2e 100644 --- a/src/llama-sampling.h +++ b/src/llama-sampling.h @@ -16,6 +16,9 @@ struct llama_sampler_chain { std::vector samplers; + // pre-allocated buffer for llama_sampler_sample to avoid repeated allocations + std::vector cur; + // timing mutable int64_t t_sample_us; From cd78e57c3aeae7b56c5843f94e0e0b83a3d8ca81 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Tue, 30 Dec 2025 15:53:12 +0100 Subject: [PATCH 02/18] lora: count lora nodes in graph_max_nodes (#18469) * lora: count lora nodes in graph_max_nodes * 3 nodes per weight * 4 nodes * keep track n_lora_nodes from llama_model * fix assert * rm redundant header * common: load adapters before context creation * use 6 nodes --- common/common.cpp | 37 +++++++++++++++++++------------------ include/llama.h | 2 ++ src/llama-adapter.cpp | 15 ++++++++++++--- src/llama-adapter.h | 8 +++++++- src/llama-context.cpp | 4 +++- src/llama-model.h | 3 +++ 6 files changed, 46 insertions(+), 23 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 58fef5954..79c475612 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1109,6 +1109,25 @@ common_init_result::common_init_result(common_params & params) : const llama_vocab * vocab = llama_model_get_vocab(model); + // load and optionally apply lora adapters (must be loaded before context creation) + for (auto & la : params.lora_adapters) { + llama_adapter_lora_ptr lora; + lora.reset(llama_adapter_lora_init(model, la.path.c_str())); + if (lora == nullptr) { + LOG_ERR("%s: failed to load lora adapter '%s'\n", __func__, la.path.c_str()); + pimpl->model.reset(model); + return; + } + + char buf[1024]; + la.ptr = lora.get(); + llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf)); + la.task_name = buf; + llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf)); + la.prompt_prefix = buf; + pimpl->lora.emplace_back(std::move(lora)); // copy to list of loaded adapters + } + // updates params.sampling // TODO: fix naming common_init_sampler_from_model(model, params.sampling); @@ -1245,24 +1264,6 @@ common_init_result_ptr common_init_from_params(common_params & params) { } } - // load and optionally apply lora adapters - for (auto & la : params.lora_adapters) { - llama_adapter_lora_ptr lora; - lora.reset(llama_adapter_lora_init(model, la.path.c_str())); - if (lora == nullptr) { - LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str()); - return res; - } - - char buf[1024]; - la.ptr = lora.get(); - llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf)); - la.task_name = buf; - llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf)); - la.prompt_prefix = buf; - res->lora().emplace_back(std::move(lora)); // copy to list of loaded adapters - } - if (!params.lora_init_without_apply) { common_set_adapter_lora(lctx, params.lora_adapters); } diff --git a/include/llama.h b/include/llama.h index 4f0124fdc..8b3c8a7b1 100644 --- a/include/llama.h +++ b/include/llama.h @@ -607,6 +607,8 @@ extern "C" { // // Load a LoRA adapter from file + // The adapter is valid as long as the associated model is not freed + // All adapters must be loaded before context creation LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init( struct llama_model * model, const char * path_lora); diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp index d8eef75a7..bdc24c2d6 100644 --- a/src/llama-adapter.cpp +++ b/src/llama-adapter.cpp @@ -146,9 +146,11 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) { return nullptr; } -static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) { +static void llama_adapter_lora_init_impl(const char * path_lora, llama_adapter_lora & adapter) { LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora); + llama_model & model = adapter.model; + ggml_context * ctx_init; gguf_init_params meta_gguf_params = { /* .no_alloc = */ true, @@ -411,14 +413,17 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_ } } + // update number of nodes used + model.n_lora_nodes += adapter.get_n_nodes(); + LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2); } llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) { - llama_adapter_lora * adapter = new llama_adapter_lora(); + llama_adapter_lora * adapter = new llama_adapter_lora(*model); try { - llama_adapter_lora_init_impl(*model, path_lora, *adapter); + llama_adapter_lora_init_impl(path_lora, *adapter); return adapter; } catch (const std::exception & err) { LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what()); @@ -469,6 +474,10 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter, } void llama_adapter_lora_free(llama_adapter_lora * adapter) { + // update number of nodes used + GGML_ASSERT(adapter->model.n_lora_nodes >= adapter->get_n_nodes()); + adapter->model.n_lora_nodes -= adapter->get_n_nodes(); + delete adapter; } diff --git a/src/llama-adapter.h b/src/llama-adapter.h index 4f65247c0..42d64a6e0 100644 --- a/src/llama-adapter.h +++ b/src/llama-adapter.h @@ -59,6 +59,8 @@ struct llama_adapter_lora_weight { }; struct llama_adapter_lora { + llama_model & model; + // map tensor name to lora_a_b std::unordered_map ab_map; @@ -73,10 +75,14 @@ struct llama_adapter_lora { // activated lora (aLoRA) std::vector alora_invocation_tokens; - llama_adapter_lora() = default; + llama_adapter_lora(llama_model & model) : model(model) {} ~llama_adapter_lora() = default; llama_adapter_lora_weight * get_weight(ggml_tensor * w); + + uint32_t get_n_nodes() const { + return ab_map.size() * 6u; // a, b, scale, add, 2 x mul_mat + } }; using llama_adapter_loras = std::unordered_map; diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 1c530fdc9..34dfcd472 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1442,7 +1442,9 @@ uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const { if (model.arch == LLM_ARCH_QWEN3NEXT) { return std::max(n_tokens * 40, 32u * model.n_tensors()); } - return std::max(1024u, 8u*model.n_tensors()); + uint32_t res = std::max(1024u, 8u*model.n_tensors()); + res += model.n_lora_nodes; + return res; } llm_graph_result * llama_context::get_gf_res_reserve() const { diff --git a/src/llama-model.h b/src/llama-model.h index dbe5edc15..f4f44a92b 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -475,6 +475,9 @@ struct llama_model { // for quantize-stats only std::vector> tensors_by_name; + // for keeping track of extra nodes used by lora adapters + uint32_t n_lora_nodes = 0; + int64_t t_load_us = 0; int64_t t_start_us = 0; From ac1d0eb7bf8c59b81a2cceb4a8dac1f44d201a3f Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Tue, 30 Dec 2025 17:20:14 +0100 Subject: [PATCH 03/18] llama : fix typo in comment in llama-kv-cache.h [no ci] (#18489) --- src/llama-kv-cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-kv-cache.h b/src/llama-kv-cache.h index 1868f1185..0c4ed6484 100644 --- a/src/llama-kv-cache.h +++ b/src/llama-kv-cache.h @@ -305,7 +305,7 @@ public: bool do_shift, stream_copy_info sc_info); - // used to create a batch procesing context from a batch + // used to create a batch processing context from a batch llama_kv_cache_context( llama_kv_cache * kv, slot_info_vec_t sinfos, From 0f89d2ecf14270f45f43c442e90ae433fd82dab1 Mon Sep 17 00:00:00 2001 From: Aldehir Rojas Date: Tue, 30 Dec 2025 12:00:57 -0600 Subject: [PATCH 04/18] common : default content to an empty string (#18485) * common : default content to an empty string * common : fix tests that break when content != null --- common/chat.cpp | 2 +- models/templates/llama-cpp-deepseek-r1.jinja | 6 +++--- tests/test-chat.cpp | 8 +++++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 0a426f447..be44c8abb 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -319,7 +319,7 @@ json common_chat_msgs_to_json_oaicompat(const std::vector & msg } } } else { - jmsg["content"] = json(); // null + jmsg["content"] = ""; } if (!msg.reasoning_content.empty()) { jmsg["reasoning_content"] = msg.reasoning_content; diff --git a/models/templates/llama-cpp-deepseek-r1.jinja b/models/templates/llama-cpp-deepseek-r1.jinja index fcb1732eb..0d1887087 100644 --- a/models/templates/llama-cpp-deepseek-r1.jinja +++ b/models/templates/llama-cpp-deepseek-r1.jinja @@ -38,7 +38,7 @@ Example function tool call syntax: {%- if message['role'] == 'user' -%} {{- '<|User|>' + message['content'] + '<|end▁of▁sentence|>' -}} {%- endif -%} - {%- if message['role'] == 'assistant' and message['content'] is none -%} + {%- if message['role'] == 'assistant' and not message['content'] -%} {{- '<|Assistant|><|tool▁calls▁begin|>' -}} {%- set ns.is_first = true -%} {%- for tc in message['tool_calls'] -%} @@ -53,7 +53,7 @@ Example function tool call syntax: {%- endfor -%} {{- '<|tool▁calls▁end|><|end▁of▁sentence|>' -}} {%- endif -%} - {%- if message['role'] == 'assistant' and message['content'] is not none -%} + {%- if message['role'] == 'assistant' and message['content'] -%} {{- flush_tool_outputs() -}} {%- set content = message['content'] -%} {%- if '' in content -%} @@ -73,4 +73,4 @@ Example function tool call syntax: {{- flush_tool_outputs() -}} {%- if add_generation_prompt and not ns.is_tool_outputs -%} {{- '<|Assistant|>\n' -}} -{%- endif -%} \ No newline at end of file +{%- endif -%} diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 02af5251c..a78627604 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -650,7 +650,7 @@ static void test_msgs_oaicompat_json_conversion() { "[\n" " {\n" " \"role\": \"assistant\",\n" - " \"content\": null,\n" + " \"content\": \"\",\n" " \"tool_calls\": [\n" " {\n" " \"type\": \"function\",\n" @@ -906,7 +906,8 @@ static void test_template_output_parsers() { " },\n" " \"id\": \"123456789\"\n" " }\n" - " ]\n" + " ],\n" + " \"content\": \"\"\n" "}"); } { @@ -1713,7 +1714,8 @@ static void test_template_output_parsers() { " },\n" " \"id\": \"123456789\"\n" " }\n" - " ]\n" + " ],\n" + " \"content\": \"\"\n" "}", /* expect_grammar_triggered= */ false ); From 6e0c8cbc40c4abf49e5c52f0f51267c2afdfc053 Mon Sep 17 00:00:00 2001 From: Bart Louwers Date: Tue, 30 Dec 2025 22:13:49 +0100 Subject: [PATCH 05/18] docs : document that JSON Schema is not available to model when using response_format (#18492) * Document unsupported JSON Schema annotations Add note about unsupported JSON Schema annotations. * Update README.md * Update README.md * Update README.md --- grammars/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/grammars/README.md b/grammars/README.md index daac7f4d8..dcd28648b 100644 --- a/grammars/README.md +++ b/grammars/README.md @@ -150,6 +150,9 @@ You can use GBNF grammars: - in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py) - in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI) +> [!NOTE] +> The JSON schema is only used to constrain the model output and is not injected into the prompt. The model has no visibility into the schema, so if you want it to understand the expected structure, describe it explicitly in your prompt. This does not apply to tool calling, where schemas are injected into the prompt. + Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggml-org/llama.cpp/pull/5978, https://github.com/ggml-org/llama.cpp/pull/6659 & https://github.com/ggml-org/llama.cpp/pull/6555). ```bash From 4849661d9898ac3caf59ddd62044185805084370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 30 Dec 2025 22:28:53 +0100 Subject: [PATCH 06/18] docker : add CUDA 13.1 image build (#18441) * add updated cuda-new.Dockerfile for Ubuntu 24.04 compatibilty * add cuda13 build --- .devops/cuda-new.Dockerfile | 95 ++++++++++++++++++++++++++++++++++++ .github/workflows/docker.yml | 35 ++++++++----- 2 files changed, 119 insertions(+), 11 deletions(-) create mode 100644 .devops/cuda-new.Dockerfile diff --git a/.devops/cuda-new.Dockerfile b/.devops/cuda-new.Dockerfile new file mode 100644 index 000000000..62443e17f --- /dev/null +++ b/.devops/cuda-new.Dockerfile @@ -0,0 +1,95 @@ +ARG UBUNTU_VERSION=24.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=13.1.0 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} AS build + +# CUDA architecture to build for (defaults to all supported archs) +ARG CUDA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 + +WORKDIR /app + +COPY . . + +RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release -j$(nproc) + +RUN mkdir -p /app/lib && \ + find build -name "*.so*" -exec cp -P {} /app/lib \; + +RUN mkdir -p /app/full \ + && cp build/bin/* /app/full \ + && cp *.py /app/full \ + && cp -r gguf-py /app/full \ + && cp -r requirements /app/full \ + && cp requirements.txt /app/full \ + && cp .devops/tools.sh /app/full/tools.sh + +## Base image +FROM ${BASE_CUDA_RUN_CONTAINER} AS base + +RUN apt-get update \ + && apt-get install -y libgomp1 curl\ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + +COPY --from=build /app/lib/ /app + +### Full +FROM base AS full + +COPY --from=build /app/full /app + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y \ + git \ + python3 \ + python3-pip \ + python3-wheel \ + && pip install --break-system-packages --upgrade setuptools \ + && pip install --break-system-packages -r requirements.txt \ + && apt autoremove -y \ + && apt clean -y \ + && rm -rf /tmp/* /var/tmp/* \ + && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ + && find /var/cache -type f -delete + + +ENTRYPOINT ["/app/tools.sh"] + +### Light, CLI only +FROM base AS light + +COPY --from=build /app/full/llama-cli /app/full/llama-completion /app + +WORKDIR /app + +ENTRYPOINT [ "/app/llama-cli" ] + +### Server, Server only +FROM base AS server + +ENV LLAMA_ARG_HOST=0.0.0.0 + +COPY --from=build /app/full/llama-server /app + +WORKDIR /app + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index bfd127071..d9fe0686d 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -40,7 +40,8 @@ jobs: # https://github.com/ggml-org/llama.cpp/issues/11888 #- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false } - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" } - - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } + - { tag: "cuda cuda12", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "12.4.0", ubuntu_version: "22.04" } + - { tag: "cuda13", dockerfile: ".devops/cuda-new.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04", cuda_version: "13.1.0", ubuntu_version: "24.04" } - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true, runs_on: "ubuntu-22.04" } - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-22.04" } @@ -80,18 +81,21 @@ jobs: run: | REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case REPO_NAME="${{ github.event.repository.name }}" + PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" # list all tags possible - if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then - TYPE="" - else - TYPE="-${{ matrix.config.tag }}" - fi - PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" - CACHETAGS="${PREFIX}buildcache${TYPE}" - FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}" - LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}" - SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}" + tags="${{ matrix.config.tag }}" + for tag in $tags; do + if [[ "$tag" == "cpu" ]]; then + TYPE="" + else + TYPE="-$tag" + fi + CACHETAGS="${PREFIX}buildcache${TYPE}" + FULLTAGS="${FULLTAGS:+$FULLTAGS,}${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}" + LIGHTTAGS="${LIGHTTAGS:+$LIGHTTAGS,}${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}" + SERVERTAGS="${SERVERTAGS:+$SERVERTAGS,}${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}" + done echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT @@ -132,6 +136,9 @@ jobs: file: ${{ matrix.config.dockerfile }} target: full provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} # using github experimental cache #cache-from: type=gha #cache-to: type=gha,mode=max @@ -154,6 +161,9 @@ jobs: file: ${{ matrix.config.dockerfile }} target: light provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} # using github experimental cache #cache-from: type=gha #cache-to: type=gha,mode=max @@ -176,6 +186,9 @@ jobs: file: ${{ matrix.config.dockerfile }} target: server provenance: false + build-args: | + ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }} + ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }} # using github experimental cache #cache-from: type=gha #cache-to: type=gha,mode=max From c8a37980419e8b7f0193d058fb6f8f01b458cfca Mon Sep 17 00:00:00 2001 From: Rahul Sathe <150351592+rrsathe@users.noreply.github.com> Date: Wed, 31 Dec 2025 06:38:44 +0530 Subject: [PATCH 07/18] Work around broken IntelSYCLConfig.cmake in Intel oneAPI 2025.x (#18345) * cmake: work around broken IntelSYCLConfig.cmake in oneAPI 2025.x * [AI] sycl: auto-detect and skip incompatible IntelSYCL package Automatically detect compiler versions with incompatible IntelSYCL CMake configuration files and fall back to manual SYCL flags instead of requiring users to set options manually. Fixes build failures with oneAPI 2025.x where IntelSYCLConfig.cmake has SYCL_FEATURE_TEST_EXTRACT invocation errors. * refactor: improve SYCL provider handling and error messages in CMake configuration * refactor: enhance SYCL provider validation and error handling in CMake configuration * ggml-sycl: wrap find_package(IntelSYCL) to prevent build crashes --- ggml/src/ggml-sycl/CMakeLists.txt | 44 +++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-sycl/CMakeLists.txt b/ggml/src/ggml-sycl/CMakeLists.txt index 88f29221b..51594fb88 100644 --- a/ggml/src/ggml-sycl/CMakeLists.txt +++ b/ggml/src/ggml-sycl/CMakeLists.txt @@ -36,7 +36,47 @@ if (WIN32) endif() endif() -find_package(IntelSYCL) +macro(detect_and_find_package package_name) + set(test_source " + cmake_minimum_required(VERSION ${CMAKE_VERSION}) + project(check_package LANGUAGES CXX) + find_package(${package_name} QUIET) + ") + + set(test_dir "${CMAKE_CURRENT_BINARY_DIR}/check_package_${package_name}") + file(WRITE "${test_dir}/CMakeLists.txt" "${test_source}") + + set(cmake_args "") + if(CMAKE_GENERATOR) + list(APPEND cmake_args "-G" "${CMAKE_GENERATOR}") + endif() + if(CMAKE_GENERATOR_PLATFORM) + list(APPEND cmake_args "-A" "${CMAKE_GENERATOR_PLATFORM}") + endif() + if(CMAKE_GENERATOR_TOOLSET) + list(APPEND cmake_args "-T" "${CMAKE_GENERATOR_TOOLSET}") + endif() + if(CMAKE_CXX_COMPILER) + list(APPEND cmake_args "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}") + endif() + + execute_process( + COMMAND ${CMAKE_COMMAND} ${cmake_args} . + WORKING_DIRECTORY "${test_dir}" + RESULT_VARIABLE result + OUTPUT_QUIET + ERROR_QUIET + ) + + if(result EQUAL 0) + find_package(${package_name} ${ARGN}) + else() + message(WARNING "Detection of ${package_name} failed. The package might be broken or incompatible.") + set(${package_name}_FOUND FALSE) + endif() +endmacro() + +detect_and_find_package(IntelSYCL) if (IntelSYCL_FOUND) # Use oneAPI CMake when possible target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX) @@ -190,4 +230,4 @@ endif() if (GGML_SYCL_DEVICE_ARCH) target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}) target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}) -endif() +endif() \ No newline at end of file From 7bcaf815c20f471fead106088b558e542982bf30 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 31 Dec 2025 14:23:44 +0800 Subject: [PATCH 08/18] sycl: add newline at the end of CMakeLists.txt (#18503) --- ggml/src/ggml-sycl/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/CMakeLists.txt b/ggml/src/ggml-sycl/CMakeLists.txt index 51594fb88..5a89d8dd6 100644 --- a/ggml/src/ggml-sycl/CMakeLists.txt +++ b/ggml/src/ggml-sycl/CMakeLists.txt @@ -230,4 +230,5 @@ endif() if (GGML_SYCL_DEVICE_ARCH) target_compile_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}) target_link_options(ggml-sycl PRIVATE -Xsycl-target-backend --offload-arch=${GGML_SYCL_DEVICE_ARCH}) -endif() \ No newline at end of file +endif() + From 01ade96e71b62b482019e42dd74551758fde8851 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 31 Dec 2025 09:53:48 +0200 Subject: [PATCH 09/18] metal : remove BF16 x F16 kernels (#18456) --- ggml/src/ggml-metal/ggml-metal.metal | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 51bcbae30..3154beff9 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -9557,9 +9557,6 @@ template [[host_name("kernel_mul_mm_iq4_xs_f32")]] kernel mul_mm_t kernel_mul_m template [[host_name("kernel_mul_mm_f32_f16")]] kernel mul_mm_t kernel_mul_mm; template [[host_name("kernel_mul_mm_f16_f16")]] kernel mul_mm_t kernel_mul_mm; -#if defined(GGML_METAL_HAS_BF16) -template [[host_name("kernel_mul_mm_bf16_f16")]] kernel mul_mm_t kernel_mul_mm; -#endif template [[host_name("kernel_mul_mm_q4_0_f16")]] kernel mul_mm_t kernel_mul_mm; template [[host_name("kernel_mul_mm_q4_1_f16")]] kernel mul_mm_t kernel_mul_mm; template [[host_name("kernel_mul_mm_q5_0_f16")]] kernel mul_mm_t kernel_mul_mm; @@ -9615,9 +9612,6 @@ template [[host_name("kernel_mul_mm_id_iq4_xs_f32")]] kernel mul_mm_id kernel_m template [[host_name("kernel_mul_mm_id_f32_f16")]] kernel mul_mm_id kernel_mul_mm_id; template [[host_name("kernel_mul_mm_id_f16_f16")]] kernel mul_mm_id kernel_mul_mm_id; -#if defined(GGML_METAL_HAS_BF16) -template [[host_name("kernel_mul_mm_id_bf16_f16")]] kernel mul_mm_id kernel_mul_mm_id; -#endif template [[host_name("kernel_mul_mm_id_q4_0_f16")]] kernel mul_mm_id kernel_mul_mm_id; template [[host_name("kernel_mul_mm_id_q4_1_f16")]] kernel mul_mm_id kernel_mul_mm_id; template [[host_name("kernel_mul_mm_id_q5_0_f16")]] kernel mul_mm_id kernel_mul_mm_id; From ecc343de63ac1aaba9c74cd26807fd60aec31ab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Wed, 31 Dec 2025 09:37:00 +0100 Subject: [PATCH 10/18] CUDA: fix KQ max calculation (#18487) --- ggml/src/ggml-cuda/fattn-mma-f16.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn-mma-f16.cuh b/ggml/src/ggml-cuda/fattn-mma-f16.cuh index 7bd1044c1..856291dc3 100644 --- a/ggml/src/ggml-cuda/fattn-mma-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-mma-f16.cuh @@ -531,7 +531,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter( for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::I) { #pragma unroll for (int l = 0; l < T_C_KQ::ne; ++l) { - if (!oob_check || k0 + T_C_KQ::get_i(l) < k_VKQ_sup) { + if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::I + T_C_KQ::get_i(l) < k_VKQ_sup) { KQ_max_new[l % 2] = fmaxf(KQ_max_new[l % 2], KQ_C[k0/(np*T_C_KQ::I)].x[l] + FATTN_KQ_MAX_OFFSET); } } @@ -583,7 +583,7 @@ static __device__ __forceinline__ void flash_attn_ext_f16_iter( for (int k0 = 0; k0 < nbatch_fa; k0 += np*T_C_KQ::J) { #pragma unroll for (int l = 0; l < T_C_KQ::ne; ++l) { - if (!oob_check || k0 + T_C_KQ::get_j(l) < k_VKQ_sup) { + if (!oob_check || k0 + (threadIdx.y % np)*T_C_KQ::J + T_C_KQ::get_j(l) < k_VKQ_sup) { // Turing + Volta: KQ_max_new[(l/2) % 2] = fmaxf(KQ_max_new[(l/2) % 2], KQ_C[(k0/(np*T_C_KQ::J))].x[l] + FATTN_KQ_MAX_OFFSET); } From 9a6369bb603457f277b597f0ccee1c19cd25c4b2 Mon Sep 17 00:00:00 2001 From: gatbontonpc Date: Wed, 31 Dec 2025 00:39:48 -0800 Subject: [PATCH 11/18] metal : add count_equal op (#18314) * add count equal for metal * remove trailing whitespace * updated doc ops table * changed shmem to i32 * added multi tg and templating * removed BLAS support from Metal docs * Apply suggestions from code review Co-authored-by: Georgi Gerganov * add memset to set dst to 0 * metal : cleanup --------- Co-authored-by: Georgi Gerganov --- docs/ops.md | 2 +- docs/ops/Metal.csv | 682 ++++++++++++---------- ggml/src/ggml-metal/ggml-metal-device.cpp | 57 ++ ggml/src/ggml-metal/ggml-metal-device.h | 2 + ggml/src/ggml-metal/ggml-metal-device.m | 5 + ggml/src/ggml-metal/ggml-metal-impl.h | 20 + ggml/src/ggml-metal/ggml-metal-ops.cpp | 67 ++- ggml/src/ggml-metal/ggml-metal-ops.h | 1 + ggml/src/ggml-metal/ggml-metal.metal | 73 +++ 9 files changed, 585 insertions(+), 324 deletions(-) diff --git a/docs/ops.md b/docs/ops.md index b395d2315..2b2770cb7 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -32,7 +32,7 @@ Legend: | CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | | COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | -| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | +| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ | | CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | diff --git a/docs/ops/Metal.csv b/docs/ops/Metal.csv index 5f7450e91..02fd75fdb 100644 --- a/docs/ops/Metal.csv +++ b/docs/ops/Metal.csv @@ -965,6 +965,7 @@ "Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal" "Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","Metal" "Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal" +"Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[2,2,1536,729],ne_kernel=[2,2,1536,4096],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","Metal" "Metal","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal" "Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal" "Metal","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","0","no","Metal" @@ -4964,8 +4965,9 @@ "Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","Metal" "Metal","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","yes","Metal" "Metal","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","yes","Metal" -"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","no","Metal" -"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","no","Metal" +"Metal","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","1","yes","Metal" +"Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","yes","Metal" +"Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","yes","Metal" "Metal","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","yes","Metal" "Metal","ARGMAX","type=f32,ne=[32,513,1,1]","support","1","yes","Metal" "Metal","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","yes","Metal" @@ -5715,15 +5717,15 @@ "Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal" "Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001,inplace=1","support","1","yes","Metal" "Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" -"Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[6,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[6,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[6,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[3,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","Metal" @@ -5733,6 +5735,15 @@ "Metal","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Metal" "Metal","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[18,1024,1,1],ne_b=[9,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,1024,4,1],ne_b=[9,1024,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[18,1536,1,1],ne_b=[9,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,1536,4,1],ne_b=[9,1536,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[18,2048,1,1],ne_b=[9,2048,1,1]","support","1","yes","Metal" +"Metal","SSM_CONV","type=f32,ne_a=[9,2048,4,1],ne_b=[9,2048,1,1]","support","1","yes","Metal" "Metal","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal" "Metal","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal" "Metal","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Metal" @@ -8916,6 +8927,8 @@ "Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=0,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000,inplace=0","support","1","yes","Metal" "Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal" "Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal" +"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal" +"Metal","SOFT_MAX","type=f32,ne=[200001,2,3,1],mask=1,sinks=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000,inplace=0","support","1","yes","Metal" "Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal" "Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","no","Metal" "Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,2,3],scale=1.000000,max_bias=0.000000","support","0","no","Metal" @@ -9542,311 +9555,311 @@ "Metal","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","Metal" "Metal","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","Metal" "Metal","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","1","yes","Metal" -"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","1","yes","Metal" +"Metal","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","1","yes","Metal" "Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","Metal" "Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","Metal" "Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","Metal" @@ -9891,8 +9904,9 @@ "Metal","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","yes","Metal" "Metal","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","yes","Metal" "Metal","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","Metal" -"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","Metal" -"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","1","yes","Metal" +"Metal","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","0","no","Metal" "Metal","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","yes","Metal" "Metal","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","1","yes","Metal" "Metal","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","Metal" @@ -9923,17 +9937,41 @@ "Metal","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","Metal" "Metal","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","Metal" "Metal","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","Metal" +"Metal","DIAG","type=f32,ne=[10,1,4,3]","support","0","no","Metal" +"Metal","DIAG","type=f32,ne=[79,1,19,13]","support","0","no","Metal" +"Metal","DIAG","type=f32,ne=[256,1,8,16]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[64,64,2,2]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[79,79,5,3],ne_rhs=[417,79,5,3]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,2],ne_rhs=[32,128,4,2]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[80,80,2,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[79,80,2,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,2,8],ne_rhs=[81,80,2,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[80,80,8,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[79,80,8,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[80,80,8,8],ne_rhs=[81,80,8,8]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[84,84,4,4],ne_rhs=[32,84,4,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[95,95,8,8],ne_rhs=[40,95,8,8]","support","0","no","Metal" "Metal","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","Metal" -"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","Metal" -"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","0","no","Metal" -"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","1","yes","Metal" -"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[32,128,4,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,3,4],ne_rhs=[32,128,3,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,1],ne_rhs=[32,128,4,1]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[200,64,4,4]","support","0","no","Metal" +"Metal","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[384,64,4,4]","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","1","yes","Metal" +"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","1","yes","Metal" +"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","0","no","Metal" +"Metal","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","0","no","Metal" "Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","1","yes","Metal" "Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","yes","Metal" "Metal","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","yes","Metal" diff --git a/ggml/src/ggml-metal/ggml-metal-device.cpp b/ggml/src/ggml-metal/ggml-metal-device.cpp index 680904d13..b0734797f 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.cpp +++ b/ggml/src/ggml-metal/ggml-metal-device.cpp @@ -1684,3 +1684,60 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd(ggm return res; } + +ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset(ggml_metal_library_t lib, const ggml_tensor * op) { + GGML_ASSERT(op->type == GGML_TYPE_I64); + + char base[256]; + char name[256]; + + snprintf(base, 256, "kernel_memset_%s", ggml_type_name(op->type)); + snprintf(name, 256, "%s", base); + + ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name); + if (!res.pipeline) { + res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr); + } + + return res; +} + +ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal(ggml_metal_library_t lib, const ggml_tensor * op) { + assert(op->op == GGML_OP_COUNT_EQUAL); + + GGML_TENSOR_LOCALS(int64_t, ne0, op->src[0], ne); + + GGML_ASSERT(op->src[0]->type == op->src[1]->type); + GGML_ASSERT(op->src[0]->type == GGML_TYPE_I32); + GGML_ASSERT(op->type == GGML_TYPE_I64); + + // note: the kernel only supports i32 output due to metal atomic add only supporting atomic_int + GGML_ASSERT(ggml_nelements(op->src[0]) < (1LL << 31)); + + char base[256]; + char name[256]; + + int nsg = 1; + while (32*nsg < ne00 && nsg < 32) { + nsg *= 2; + } + + snprintf(base, 256, "kernel_count_equal_%s", ggml_type_name(op->src[0]->type)); + snprintf(name, 256, "%s_nsg=%d", base, nsg); + + ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name); + if (!res.pipeline) { + ggml_metal_cv_t cv = ggml_metal_cv_init(); + + ggml_metal_cv_set_int16(cv, nsg, FC_COUNT_EQUAL + 0); + + res = ggml_metal_library_compile_pipeline(lib, base, name, cv); + + ggml_metal_cv_free(cv); + } + + res.smem = 32 * sizeof(int32_t); + res.nsg = nsg; + + return res; +} diff --git a/ggml/src/ggml-metal/ggml-metal-device.h b/ggml/src/ggml-metal/ggml-metal-device.h index 0a8b9211a..d983b666c 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.h +++ b/ggml/src/ggml-metal/ggml-metal-device.h @@ -147,6 +147,8 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_arange struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_timestep_embedding(ggml_metal_library_t lib, const struct ggml_tensor * op); struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_adamw (ggml_metal_library_t lib, const struct ggml_tensor * op); struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_opt_step_sgd (ggml_metal_library_t lib, const struct ggml_tensor * op); +struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_memset (ggml_metal_library_t lib, const struct ggml_tensor * op); +struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_count_equal (ggml_metal_library_t lib, const struct ggml_tensor * op); struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_flash_attn_ext_pad( ggml_metal_library_t lib, diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index f24270bb1..59badd004 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -1023,6 +1023,11 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]); case GGML_OP_L2_NORM: return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0])); + case GGML_OP_COUNT_EQUAL: + return has_simdgroup_reduction && + op->src[0]->type == GGML_TYPE_I32 && + op->src[1]->type == GGML_TYPE_I32 && + op->type == GGML_TYPE_I64; case GGML_OP_ARGMAX: return has_simdgroup_reduction; case GGML_OP_NORM: diff --git a/ggml/src/ggml-metal/ggml-metal-impl.h b/ggml/src/ggml-metal/ggml-metal-impl.h index 8944b07e9..d3b0e732e 100644 --- a/ggml/src/ggml-metal/ggml-metal-impl.h +++ b/ggml/src/ggml-metal/ggml-metal-impl.h @@ -78,6 +78,7 @@ #define FC_MUL_MM 700 #define FC_ROPE 800 #define FC_SSM_CONV 900 +#define FC_COUNT_EQUAL 1000 // op-specific constants #define OP_FLASH_ATTN_EXT_NQPTG 8 @@ -894,6 +895,25 @@ typedef struct { float step; } ggml_metal_kargs_arange; +typedef struct { + int64_t val; +} ggml_metal_kargs_memset; + +typedef struct { + int32_t ne00; + int32_t ne01; + int32_t ne02; + int32_t ne03; + uint64_t nb00; + uint64_t nb01; + uint64_t nb02; + uint64_t nb03; + uint64_t nb10; + uint64_t nb11; + uint64_t nb12; + uint64_t nb13; +} ggml_metal_kargs_count_equal; + typedef struct { int32_t k0; int32_t k1; diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp index e99c1763f..acf2aa918 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.cpp +++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp @@ -448,7 +448,11 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) { { n_fuse = ggml_metal_op_opt_step_sgd(ctx, idx); } break; - default: + case GGML_OP_COUNT_EQUAL: + { + n_fuse = ggml_metal_op_count_equal(ctx, idx); + } break; + default: { GGML_LOG_ERROR("%s: error: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(node->op)); GGML_ABORT("fatal error"); @@ -4090,3 +4094,64 @@ int ggml_metal_op_opt_step_sgd(ggml_metal_op_t ctx, int idx) { return 1; } + +int ggml_metal_op_count_equal(ggml_metal_op_t ctx, int idx) { + ggml_tensor * op = ctx->node(idx); + + ggml_metal_library_t lib = ctx->lib; + ggml_metal_encoder_t enc = ctx->enc; + + GGML_TENSOR_LOCALS(int32_t, ne0, op->src[0], ne); + GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb); + GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb); + + { + ggml_metal_kargs_memset args = { /*.val =*/ 0 }; + + auto pipeline = ggml_metal_library_get_pipeline_memset(lib, op); + + ggml_metal_encoder_set_pipeline(enc, pipeline); + ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0); + ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 1); + + ggml_metal_encoder_dispatch_threadgroups(enc, 1, 1, 1, 1, 1, 1); + } + + ggml_metal_op_concurrency_reset(ctx); + + { + ggml_metal_kargs_count_equal args = { + /*.ne00 =*/ ne00, + /*.ne01 =*/ ne01, + /*.ne02 =*/ ne02, + /*.ne03 =*/ ne03, + /*.nb00 =*/ nb00, + /*.nb01 =*/ nb01, + /*.nb02 =*/ nb02, + /*.nb03 =*/ nb03, + /*.nb10 =*/ nb10, + /*.nb11 =*/ nb11, + /*.nb12 =*/ nb12, + /*.nb13 =*/ nb13, + }; + + auto pipeline = ggml_metal_library_get_pipeline_count_equal(lib, op); + + const size_t smem = pipeline.smem; + + const int nth = 32*pipeline.nsg; + + GGML_ASSERT(nth <= ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)); + + ggml_metal_encoder_set_pipeline(enc, pipeline); + ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0); + ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1); + ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[1]), 2); + ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 3); + + ggml_metal_encoder_set_threadgroup_memory_size(enc, smem, 0); + ggml_metal_encoder_dispatch_threadgroups(enc, ne01, ne02, ne03, nth, 1, 1); + } + + return 1; +} diff --git a/ggml/src/ggml-metal/ggml-metal-ops.h b/ggml/src/ggml-metal/ggml-metal-ops.h index 902b54452..c1025d356 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.h +++ b/ggml/src/ggml-metal/ggml-metal-ops.h @@ -87,6 +87,7 @@ int ggml_metal_op_leaky_relu (ggml_metal_op_t ctx, int idx); int ggml_metal_op_tri (ggml_metal_op_t ctx, int idx); int ggml_metal_op_opt_step_adamw (ggml_metal_op_t ctx, int idx); int ggml_metal_op_opt_step_sgd (ggml_metal_op_t ctx, int idx); +int ggml_metal_op_count_equal (ggml_metal_op_t ctx, int idx); #ifdef __cplusplus } diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 3154beff9..67b30e0d9 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -1790,6 +1790,7 @@ kernel void kernel_op_sum_f32( return; } + // TODO: become function constant const uint nsg = (ntg.x + 31) / 32; float sumf = 0; @@ -9914,3 +9915,75 @@ kernel void kernel_opt_step_sgd_f32( x[gid] = x[gid] * (1.0f - pars[0] * pars[1]) - pars[0] * g[gid]; } + +template +kernel void kernel_memset( + constant ggml_metal_kargs_fill & args, + device T * dst, + uint tpig[[thread_position_in_grid]]) { + dst[tpig] = args.val; +} + +typedef decltype(kernel_memset) kernel_memset_t; + +template [[host_name("kernel_memset_i64")]] kernel kernel_memset_t kernel_memset; + +constant short FC_count_equal_nsg [[function_constant(FC_COUNT_EQUAL + 0)]]; + +template +kernel void kernel_count_equal( + constant ggml_metal_kargs_count_equal & args, + device const char * src0, + device const char * src1, + device atomic_int * dst, + threadgroup int32_t * shmem_i32 [[threadgroup(0)]], + uint3 tgpig[[threadgroup_position_in_grid]], + ushort3 tpitg[[thread_position_in_threadgroup]], + ushort sgitg[[simdgroup_index_in_threadgroup]], + ushort tiisg[[thread_index_in_simdgroup]], + ushort3 ntg[[threads_per_threadgroup]]) { + const short NSG = FC_count_equal_nsg; + + const int i3 = tgpig.z; + const int i2 = tgpig.y; + const int i1 = tgpig.x; + + if (i3 >= args.ne03 || i2 >= args.ne02 || i1 >= args.ne01) { + return; + } + + int sum = 0; + + device const char * base0 = src0 + i1*args.nb01 + i2*args.nb02 + i3*args.nb03; + device const char * base1 = src1 + i1*args.nb11 + i2*args.nb12 + i3*args.nb13; + + for (int64_t i0 = tpitg.x; i0 < args.ne00; i0 += ntg.x) { + const T v0 = *(device const T *)(base0 + i0*args.nb00); + const T v1 = *(device const T *)(base1 + i0*args.nb10); + sum += (v0 == v1); + } + + sum = simd_sum(sum); + + if (tiisg == 0) { + shmem_i32[sgitg] = sum; + } + + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (sgitg == 0) { + float v = 0.0f; + if (tpitg.x < NSG) { + v = shmem_i32[tpitg.x]; + } + + float total = simd_sum(v); + if (tpitg.x == 0) { + atomic_fetch_add_explicit(dst, (int32_t) total, memory_order_relaxed); + } + } +} + +typedef decltype(kernel_count_equal) kernel_count_equal_t; + +template [[host_name("kernel_count_equal_i32")]] kernel kernel_count_equal_t kernel_count_equal; From 9b8329de7a7200385aaac16ab4a2ab79ae14d829 Mon Sep 17 00:00:00 2001 From: Henry147147 <44851451+Henry147147@users.noreply.github.com> Date: Wed, 31 Dec 2025 06:13:23 -0500 Subject: [PATCH 12/18] mtmd : Adding support for Nvidia Music Flamingo Model (#18470) * Inital commit, debugging q5_k_s quant * Made hf_to_gguf extend whisper to reduce code duplication * addressed convert_hf_to_gguf pull request issue --------- Co-authored-by: Henry D --- convert_hf_to_gguf.py | 14 +++++++++++++- gguf-py/gguf/constants.py | 1 + tools/mtmd/clip-impl.h | 2 ++ tools/mtmd/clip-model.h | 3 ++- tools/mtmd/clip.cpp | 19 ++++++++++++++++++- tools/mtmd/models/whisper-enc.cpp | 9 +++++++++ tools/mtmd/mtmd.cpp | 4 ++++ 7 files changed, 49 insertions(+), 3 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index f893b24c7..173f8ed0d 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3503,7 +3503,7 @@ class QwenModel(TextModel): self._set_vocab_qwen() -@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM") +@ModelBase.register("Qwen2Model", "Qwen2ForCausalLM", "Qwen2AudioForConditionalGeneration", "KORMoForCausalLM", "AudioFlamingo3ForConditionalGeneration") class Qwen2Model(TextModel): model_arch = gguf.MODEL_ARCH.QWEN2 @@ -9292,6 +9292,18 @@ class VoxtralWhisperEncoderModel(WhisperEncoderModel): self.gguf_writer.add_audio_stack_factor(4) # == intermediate_size // hidden_size +@ModelBase.register("AudioFlamingo3ForConditionalGeneration") +class AudioFlamingo3WhisperEncoderModel(WhisperEncoderModel): + def set_gguf_parameters(self): + super().set_gguf_parameters() + self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.MUSIC_FLAMINGO) + + def tensor_force_quant(self, name, new_name, bid, n_dims): + if ".conv" in name and ".weight" in name: + # Was trained in BF16, being safe, avoiding quantizing to FP16 + return gguf.GGMLQuantizationType.F32 + return super().tensor_force_quant(name, new_name, bid, n_dims) + @ModelBase.register("FalconH1ForCausalLM") class FalconH1Model(Mamba2Model): model_arch = gguf.MODEL_ARCH.FALCON_H1 diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 616b8add3..c2a0f41c1 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -3492,6 +3492,7 @@ class VisionProjectorType: COGVLM = "cogvlm" JANUS_PRO = "janus_pro" LFM2A = "lfm2a" # audio + MUSIC_FLAMINGO = "musicflamingo" # audio GLM4V = "glm4v" diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h index a0939865e..1ed074188 100644 --- a/tools/mtmd/clip-impl.h +++ b/tools/mtmd/clip-impl.h @@ -180,6 +180,7 @@ enum projector_type { PROJECTOR_TYPE_GLMA, PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx PROJECTOR_TYPE_VOXTRAL, + PROJECTOR_TYPE_MUSIC_FLAMINGO, PROJECTOR_TYPE_LFM2, PROJECTOR_TYPE_KIMIVL, PROJECTOR_TYPE_LIGHTONOCR, @@ -209,6 +210,7 @@ static std::map PROJECTOR_TYPE_NAMES = { { PROJECTOR_TYPE_GLMA, "glma"}, { PROJECTOR_TYPE_QWEN25O, "qwen2.5o"}, { PROJECTOR_TYPE_VOXTRAL, "voxtral"}, + { PROJECTOR_TYPE_MUSIC_FLAMINGO, "musicflamingo"}, { PROJECTOR_TYPE_LFM2, "lfm2"}, { PROJECTOR_TYPE_KIMIVL, "kimivl"}, { PROJECTOR_TYPE_LIGHTONOCR,"lightonocr"}, diff --git a/tools/mtmd/clip-model.h b/tools/mtmd/clip-model.h index b4c31cdde..1e5aa87b9 100644 --- a/tools/mtmd/clip-model.h +++ b/tools/mtmd/clip-model.h @@ -319,7 +319,8 @@ struct clip_model { bool audio_has_avgpool() const { return proj_type == PROJECTOR_TYPE_QWEN2A - || proj_type == PROJECTOR_TYPE_VOXTRAL; + || proj_type == PROJECTOR_TYPE_VOXTRAL + || proj_type == PROJECTOR_TYPE_MUSIC_FLAMINGO; } bool audio_has_stack_frames() const { diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 3ba0823de..fb08dd258 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -818,6 +818,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 case PROJECTOR_TYPE_VOXTRAL: case PROJECTOR_TYPE_QWEN2A: case PROJECTOR_TYPE_GLMA: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: { builder = std::make_unique(ctx, img); } break; @@ -1176,6 +1177,7 @@ struct clip_model_loader { case PROJECTOR_TYPE_QWEN2A: case PROJECTOR_TYPE_GLMA: case PROJECTOR_TYPE_VOXTRAL: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: { bool require_stack = model.proj_type == PROJECTOR_TYPE_ULTRAVOX || model.proj_type == PROJECTOR_TYPE_VOXTRAL || @@ -1576,6 +1578,17 @@ struct clip_model_loader { model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight")); model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight")); } break; + case PROJECTOR_TYPE_MUSIC_FLAMINGO: + { + model.conv1d_1_w = get_tensor(string_format(TN_CONV1D, 1, "weight")); + model.conv1d_1_b = get_tensor(string_format(TN_CONV1D, 1, "bias")); + model.conv1d_2_w = get_tensor(string_format(TN_CONV1D, 2, "weight")); + model.conv1d_2_b = get_tensor(string_format(TN_CONV1D, 2, "bias")); + model.mm_1_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "weight")); + model.mm_1_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 1, "bias")); + model.mm_2_w = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "weight")); + model.mm_2_b = get_tensor(string_format(TN_MM_AUDIO_MLP, 2, "bias")); + } break; case PROJECTOR_TYPE_INTERNVL: { model.mm_0_w = get_tensor(string_format(TN_MVLM_PROJ_MLP, 0, "weight")); @@ -3031,6 +3044,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im case PROJECTOR_TYPE_VOXTRAL: case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_QWEN2A: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: { n_patches = img->nx; @@ -3403,6 +3417,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_LFM2: case PROJECTOR_TYPE_VOXTRAL: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: case PROJECTOR_TYPE_JANUS_PRO: case PROJECTOR_TYPE_COGVLM: { @@ -3526,6 +3541,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { return ctx->model.projection->ne[1]; case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_VOXTRAL: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: return ctx->model.mm_2_w->ne[1]; case PROJECTOR_TYPE_INTERNVL: return ctx->model.mm_3_w->ne[1]; @@ -3587,7 +3603,8 @@ bool clip_has_whisper_encoder(const struct clip_ctx * ctx) { return ctx->proj_type() == PROJECTOR_TYPE_ULTRAVOX || ctx->proj_type() == PROJECTOR_TYPE_QWEN2A || ctx->proj_type() == PROJECTOR_TYPE_GLMA - || ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL; + || ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL + || ctx->proj_type() == PROJECTOR_TYPE_MUSIC_FLAMINGO; } bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) { diff --git a/tools/mtmd/models/whisper-enc.cpp b/tools/mtmd/models/whisper-enc.cpp index 2870d854a..2f2b12775 100644 --- a/tools/mtmd/models/whisper-enc.cpp +++ b/tools/mtmd/models/whisper-enc.cpp @@ -86,6 +86,15 @@ ggml_cgraph * clip_graph_whisper_enc::build() { FFN_GELU_ERF, -1); + } else if (proj_type == PROJECTOR_TYPE_MUSIC_FLAMINGO) { + // projector + cur = build_ffn(cur, + model.mm_1_w, model.mm_1_b, + nullptr, nullptr, + model.mm_2_w, model.mm_2_b, + FFN_GELU_ERF, + -1); + } else if (proj_type == PROJECTOR_TYPE_GLMA) { cur = ggml_norm(ctx0, cur, hparams.eps); cur = ggml_mul(ctx0, cur, model.mm_norm_pre_w); diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp index b9c4fa909..b0b5ab42a 100644 --- a/tools/mtmd/mtmd.cpp +++ b/tools/mtmd/mtmd.cpp @@ -330,6 +330,7 @@ struct mtmd_context { case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_VOXTRAL: case PROJECTOR_TYPE_GLMA: + case PROJECTOR_TYPE_MUSIC_FLAMINGO: audio_preproc = std::make_unique(ctx_a); break; case PROJECTOR_TYPE_LFM2A: @@ -352,6 +353,9 @@ struct mtmd_context { // [BEGIN_AUDIO] ... (embeddings) ... aud_beg = "[BEGIN_AUDIO]"; + } else if (proj == PROJECTOR_TYPE_MUSIC_FLAMINGO) { + // ... (embeddings) ... + aud_beg = ""; } } From 0db81098494023775a704a44042c317d36c91f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Wed, 31 Dec 2025 14:28:21 +0100 Subject: [PATCH 13/18] convert : lint fix (#18507) --- convert_hf_to_gguf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 173f8ed0d..edc0ed539 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -9304,6 +9304,7 @@ class AudioFlamingo3WhisperEncoderModel(WhisperEncoderModel): return gguf.GGMLQuantizationType.F32 return super().tensor_force_quant(name, new_name, bid, n_dims) + @ModelBase.register("FalconH1ForCausalLM") class FalconH1Model(Mamba2Model): model_arch = gguf.MODEL_ARCH.FALCON_H1 From 33ded988ba9a5514036d64334f803334047a15d8 Mon Sep 17 00:00:00 2001 From: Anri Lombard Date: Wed, 31 Dec 2025 17:29:03 +0200 Subject: [PATCH 14/18] quantize: prevent input/output file collision (#18451) Check if input and output files are the same before quantizing to prevent file corruption when mmap reads from a file being written to. Fixes #12753 --- tools/quantize/quantize.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 470dc3d91..881f4b3dd 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -12,6 +12,7 @@ #include #include #include +#include struct quant_option { std::string name; @@ -643,6 +644,11 @@ int main(int argc, char ** argv) { return 1; } + if (std::error_code ec; std::filesystem::equivalent(fname_inp, fname_out, ec)) { + fprintf(stderr, "%s: error: input and output files are the same: '%s'\n", __func__, fname_inp.c_str()); + return 1; + } + print_build_info(); fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str()); From 54f67b9b66341a7d2a362bdb67211090c791ef44 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 31 Dec 2025 18:24:07 +0200 Subject: [PATCH 15/18] ggml : bump version to 0.9.5 (ggml/1410) --- ggml/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index cb46c3210..0176ca1ce 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -4,7 +4,7 @@ project("ggml" C CXX ASM) ### GGML Version set(GGML_VERSION_MAJOR 0) set(GGML_VERSION_MINOR 9) -set(GGML_VERSION_PATCH 4) +set(GGML_VERSION_PATCH 5) set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}") find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH) From 13814eb370d2f0b70e1830cc577b6155b17aee47 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 31 Dec 2025 18:27:54 +0200 Subject: [PATCH 16/18] sync : ggml --- scripts/sync-ggml.last | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index 5823efac2..c83827615 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -130bc125a88bb57664b88932c48c38a1cb316fac +ebc3a0f4a56be1c9424a89fbec09962ac34fde85 From 4cd162a1235682d78e0ad04ca5f27bcaeef2460e Mon Sep 17 00:00:00 2001 From: Anri Lombard Date: Thu, 1 Jan 2026 01:21:37 +0200 Subject: [PATCH 17/18] chat: make tool description and parameters optional per OpenAI spec (#18478) * chat: make tool description and parameters optional per OpenAI spec Per the OpenAI API specification, both 'description' and 'parameters' fields in tool function definitions are optional. Previously, the parser would throw an exception if these fields were missing. Attempts to fix #17667 * refactor: use value() for cleaner optional field access --- common/chat.cpp | 4 ++-- tests/test-chat.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index be44c8abb..7e940695b 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -380,8 +380,8 @@ std::vector common_chat_tools_parse_oaicompat(const json & too const auto & function = tool.at("function"); result.push_back({ /* .name = */ function.at("name"), - /* .description = */ function.at("description"), - /* .parameters = */ function.at("parameters").dump(), + /* .description = */ function.value("description", ""), + /* .parameters = */ function.value("parameters", json::object()).dump(), }); } } diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index a78627604..a07c81fba 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -724,6 +724,30 @@ static void test_tools_oaicompat_json_conversion() { "]" ), common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2)); + + { + auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse( + R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])")); + assert_equals((size_t) 1, tools_no_params.size()); + assert_equals(std::string("test_func"), tools_no_params[0].name); + assert_equals(std::string("A test"), tools_no_params[0].description); + assert_equals(std::string("{}"), tools_no_params[0].parameters); + } + { + auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse( + R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])")); + assert_equals((size_t) 1, tools_no_desc.size()); + assert_equals(std::string("test_func"), tools_no_desc[0].name); + assert_equals(std::string(""), tools_no_desc[0].description); + } + { + auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse( + R"([{"type": "function", "function": {"name": "test_func"}}])")); + assert_equals((size_t) 1, tools_minimal.size()); + assert_equals(std::string("test_func"), tools_minimal[0].name); + assert_equals(std::string(""), tools_minimal[0].description); + assert_equals(std::string("{}"), tools_minimal[0].parameters); + } } static void test_template_output_parsers() { From 9e10bd2eafa337380533b9c066a81f9916240e1c Mon Sep 17 00:00:00 2001 From: triplenom <79777178+triplenom@users.noreply.github.com> Date: Wed, 31 Dec 2025 21:24:43 -0500 Subject: [PATCH 18/18] llama: handle short reads in direct I/O path (#18504) --- src/llama-mmap.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp index 23b648a2e..232005e14 100644 --- a/src/llama-mmap.cpp +++ b/src/llama-mmap.cpp @@ -240,9 +240,10 @@ struct llama_file::impl { throw std::runtime_error("unexpectedly reached end of file"); } } else { - bool successful = false; - while (!successful) { - off_t ret = read(fd, ptr, len); + size_t bytes_read = 0; + while (bytes_read < len) { + const size_t to_read = len - bytes_read; + ssize_t ret = ::read(fd, reinterpret_cast(ptr) + bytes_read, to_read); if (ret == -1) { if (errno == EINTR) { @@ -251,10 +252,16 @@ struct llama_file::impl { throw std::runtime_error(format("read error: %s", strerror(errno))); } if (ret == 0) { + // EOF: allow if this read was only pulling alignment padding past file end + off_t pos = lseek(fd, 0, SEEK_CUR); + if (pos != -1 && (size_t) pos == size) { + std::memset(reinterpret_cast(ptr) + bytes_read, 0, len - bytes_read); + return; + } throw std::runtime_error("unexpectedly reached end of file"); } - successful = true; + bytes_read += (size_t) ret; } } }