model : qwen3vl reranker text support (#20332)

* model : fix qwen3vl reranker support

* Remove CLS_OUT

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
Vinicios Lugli 2026-03-10 19:40:14 -03:00 committed by GitHub
parent 10e5b148b0
commit 4d99d45084
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 23 additions and 6 deletions

View file

@ -250,7 +250,7 @@ void llm_graph_input_cls::set_input(const llama_ubatch * ubatch) {
const bool last = (
cparams.pooling_type == LLAMA_POOLING_TYPE_LAST ||
(cparams.pooling_type == LLAMA_POOLING_TYPE_RANK && arch == LLM_ARCH_QWEN3) // qwen3 reranking & embedding models use last token
(cparams.pooling_type == LLAMA_POOLING_TYPE_RANK && (arch == LLM_ARCH_QWEN3 || arch == LLM_ARCH_QWEN3VL)) // qwen3 reranking & embedding models use last token
);
for (int i = 0; i < n_tokens; ++i) {
@ -2552,7 +2552,7 @@ void llm_graph_context::build_pooling(
}
// softmax for qwen3 reranker
if (arch == LLM_ARCH_QWEN3) {
if (arch == LLM_ARCH_QWEN3 || arch == LLM_ARCH_QWEN3VL) {
cur = ggml_soft_max(ctx0, cur);
}
} break;