diff --git a/README.md b/README.md index 844675638..a01ef6d50 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [Bunny](https://github.com/BAAI-DCAI/Bunny) - [x] [GLM-EDGE](https://huggingface.co/models?search=glm-edge) - [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d) +- [x] [LFM2-VL](https://huggingface.co/collections/LiquidAI/lfm2-vl-68963bbc84a610f7638d5ffa) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 41804f3a2..61ebe6e5e 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -2590,6 +2590,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.ATTN_K, MODEL_TENSOR.ATTN_V, MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.OUTPUT, ], MODEL_ARCH.SMALLTHINKER: [ MODEL_TENSOR.TOKEN_EMBD, diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index 18dcc6ddf..c759a9c6d 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -2010,6 +2010,7 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" }, { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, } }, { diff --git a/src/llama-model.cpp b/src/llama-model.cpp index c4f0b12f2..3c8440a8f 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -5474,8 +5474,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) { } break; case LLM_ARCH_LFM2: { - tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0); + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } for (int i = 0; i < n_layer; ++i) { auto & layer = layers[i]; @@ -17787,8 +17792,7 @@ struct llm_build_lfm2 : public llm_graph_context { cb(cur, "model.embedding_norm", -1); res->t_embd = cur; - // lm_head is tied with embeddings - cur = build_lora_mm(model.tok_embd, cur); + cur = build_lora_mm(model.output, cur); cb(cur, "lm_head", -1); res->t_logits = cur; diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 1676c3283..b3628db64 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -3513,7 +3513,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str const int height = img->ny; const int total_factor = params.patch_size * params.proj_scale_factor; constexpr int min_image_tokens = 64; - constexpr int max_image_tokens = 256; + constexpr int max_image_tokens = 1024; const float min_pixels = min_image_tokens * total_factor * total_factor; const float max_pixels = max_image_tokens * total_factor * total_factor;