mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 11:40:43 +00:00
model: support GLM4V vision encoder (#18042)
* convert ok * no deepstack * less new tensors * cgraph ok * add mrope for text model * faster patch merger * add GGML_ROPE_TYPE_MRNORM * add support for metal * move glm4v do dedicated graph * convert: add norm_embd * clip: add debugging fn * working correctly * fix style * use bicubic * fix mrope metal * improve cpu * convert to neox ordering on conversion * revert backend changes * force stop if using old weight * support moe variant * fix conversion * fix convert (2) * Update tools/mtmd/clip-graph.h Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * process mrope_section on TextModel base class * resolve conflict merge --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
9963b81f63
commit
3d86c6c2b5
17 changed files with 412 additions and 79 deletions
|
|
@ -217,7 +217,7 @@ struct mtmd_context {
|
|||
|
||||
void init_vision() {
|
||||
GGML_ASSERT(ctx_v != nullptr);
|
||||
use_mrope = clip_is_qwen2vl(ctx_v);
|
||||
use_mrope = clip_is_mrope(ctx_v);
|
||||
|
||||
projector_type proj = clip_get_projector_type(ctx_v);
|
||||
int minicpmv_version = clip_is_minicpmv(ctx_v);
|
||||
|
|
@ -309,6 +309,10 @@ struct mtmd_context {
|
|||
img_beg = "<|image_start|>";
|
||||
img_end = "<|image_end|>";
|
||||
|
||||
} else if (proj == PROJECTOR_TYPE_GLM4V) {
|
||||
img_beg = "<|begin_of_image|>";
|
||||
img_end = "<|end_of_image|>";
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue