mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
readme : model : mtdm : lfm2 improvements (#15476)
* Support untied embeddings * Increase number of image tokens to 1024 * Add LFM2-VL to readme * Actually use untied embeddings
This commit is contained in:
parent
a0f98dd604
commit
e288693669
5 changed files with 11 additions and 4 deletions
|
@ -151,6 +151,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
|
||||||
- [x] [Bunny](https://github.com/BAAI-DCAI/Bunny)
|
- [x] [Bunny](https://github.com/BAAI-DCAI/Bunny)
|
||||||
- [x] [GLM-EDGE](https://huggingface.co/models?search=glm-edge)
|
- [x] [GLM-EDGE](https://huggingface.co/models?search=glm-edge)
|
||||||
- [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d)
|
- [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d)
|
||||||
|
- [x] [LFM2-VL](https://huggingface.co/collections/LiquidAI/lfm2-vl-68963bbc84a610f7638d5ffa)
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
|
|
@ -2590,6 +2590,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.ATTN_K,
|
MODEL_TENSOR.ATTN_K,
|
||||||
MODEL_TENSOR.ATTN_V,
|
MODEL_TENSOR.ATTN_V,
|
||||||
MODEL_TENSOR.ATTN_OUT,
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.SMALLTHINKER: [
|
MODEL_ARCH.SMALLTHINKER: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
|
|
@ -2010,6 +2010,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||||
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
|
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
|
||||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||||
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
||||||
|
{ LLM_TENSOR_OUTPUT, "output" },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -5474,8 +5474,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_LFM2:
|
case LLM_ARCH_LFM2:
|
||||||
{
|
{
|
||||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||||
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
|
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
|
||||||
|
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
|
||||||
|
|
||||||
|
if (output == NULL) {
|
||||||
|
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < n_layer; ++i) {
|
for (int i = 0; i < n_layer; ++i) {
|
||||||
auto & layer = layers[i];
|
auto & layer = layers[i];
|
||||||
|
@ -17787,8 +17792,7 @@ struct llm_build_lfm2 : public llm_graph_context {
|
||||||
cb(cur, "model.embedding_norm", -1);
|
cb(cur, "model.embedding_norm", -1);
|
||||||
res->t_embd = cur;
|
res->t_embd = cur;
|
||||||
|
|
||||||
// lm_head is tied with embeddings
|
cur = build_lora_mm(model.output, cur);
|
||||||
cur = build_lora_mm(model.tok_embd, cur);
|
|
||||||
cb(cur, "lm_head", -1);
|
cb(cur, "lm_head", -1);
|
||||||
|
|
||||||
res->t_logits = cur;
|
res->t_logits = cur;
|
||||||
|
|
|
@ -3513,7 +3513,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
|
||||||
const int height = img->ny;
|
const int height = img->ny;
|
||||||
const int total_factor = params.patch_size * params.proj_scale_factor;
|
const int total_factor = params.patch_size * params.proj_scale_factor;
|
||||||
constexpr int min_image_tokens = 64;
|
constexpr int min_image_tokens = 64;
|
||||||
constexpr int max_image_tokens = 256;
|
constexpr int max_image_tokens = 1024;
|
||||||
const float min_pixels = min_image_tokens * total_factor * total_factor;
|
const float min_pixels = min_image_tokens * total_factor * total_factor;
|
||||||
const float max_pixels = max_image_tokens * total_factor * total_factor;
|
const float max_pixels = max_image_tokens * total_factor * total_factor;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue