mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 08:00:25 +00:00
convert : add image break token fallback (#22914)
* convert : add image break token fallback
This commit adds a image_break_token_id fallback for mistral where the
config contains a image_break_token_id of -1:
```console
"vision_encoder": {
"image_token_id": 10,
"image_break_token_id": -1,
...
```
But the tokenizer.json has this token:
```console
115 "id": 12,
116 "content": "[IMG_BREAK]",
117 "single_word": false,
118 "lstrip": false,
119 "rstrip": false,
120 "normalized": false,
121 "special": true
122 },
```
If we look in convert_hf_to_gguf.py we have:
```python
elif self.is_mistral_format:
# hparams is already vision config here so norm_eps is only defined in global_config.
self.hparams["norm_eps"] = self.global_config.get("norm_eps", None)
assert self.hparams["norm_eps"] is not None, "norm_eps not found in params.json"
if self.use_break_tok:
self.img_break_tok_id = self.find_vparam(["image_break_token_id"])
```
The motivation for this is that currently converting this models
results in the following error:
```console
load_hparams: model size: 5131.60 MiB
load_hparams: metadata size: 0.15 MiB
clip_init: failed to load model 'models/mmproj-Mistral-Medium-3.5-128B.gguf': operator(): unable to find tensor v.token_embd.img_break
mtmd_init_from_file: error: Failed to load CLIP model from models/mmproj-Mistral-Medium-3.5-128B.gguf
Failed to load vision model from models/mmproj-Mistral-Medium-3.5-128B.gguf
```
With this fallback the model loads successfully.
Resolves: https://github.com/ggml-org/llama.cpp/issues/22901
* Revert "convert : add image break token fallback"
This reverts commit 292e40cfdf9a7553863007c018236f5f554f71d8.
* convert : add image break token fallback
This commit adds a image_break_token_id fallback for mistral where the
config contains a image_break_token_id of -1:
```console
"vision_encoder": {
"image_token_id": 10,
"image_break_token_id": -1,
...
```
But the tokenizer.json has this token:
```console
115 "id": 12,
116 "content": "[IMG_BREAK]",
117 "single_word": false,
118 "lstrip": false,
119 "rstrip": false,
120 "normalized": false,
121 "special": true
122 },
```
If we look in convert_hf_to_gguf.py we have:
```python
elif self.is_mistral_format:
# hparams is already vision config here so norm_eps is only defined in global_config.
self.hparams["norm_eps"] = self.global_config.get("norm_eps", None)
assert self.hparams["norm_eps"] is not None, "norm_eps not found in params.json"
if self.use_break_tok:
self.img_break_tok_id = self.find_vparam(["image_break_token_id"])
```
The motivation for this is that currently converting this models
results in the following error:
```console
load_hparams: model size: 5131.60 MiB
load_hparams: metadata size: 0.15 MiB
clip_init: failed to load model 'models/mmproj-Mistral-Medium-3.5-128B.gguf': operator(): unable to find tensor v.token_embd.img_break
mtmd_init_from_file: error: Failed to load CLIP model from models/mmproj-Mistral-Medium-3.5-128B.gguf
Failed to load vision model from models/mmproj-Mistral-Medium-3.5-128B.gguf
```
With this fallback the model loads successfully.
Co-authored-by: Pascal <admin@serveurperso.com>
Resolves: https://github.com/ggml-org/llama.cpp/issues/22901
* convert : allow zero value for img_break_tok_id
This commit is contained in:
parent
838374375c
commit
f5636f8fc7
1 changed files with 25 additions and 1 deletions
|
|
@ -2176,7 +2176,8 @@ class MmprojModel(ModelBase):
|
|||
text_config = {
|
||||
k: v for k, v in self.hparams.items() if k not in ["vision_encoder", "audio_encoder"]
|
||||
}
|
||||
self.n_embd_text = text_config.get("hidden_dim", 0)
|
||||
# mistral native params.json: "dim" is the text hidden size ("hidden_dim" is the FFN intermediate size)
|
||||
self.n_embd_text = text_config.get("dim", 0)
|
||||
|
||||
assert self.n_embd_text > 0, "n_embd not found in hparams"
|
||||
|
||||
|
|
@ -3137,6 +3138,11 @@ class LlavaVisionModel(MmprojModel):
|
|||
assert self.hparams["norm_eps"] is not None, "norm_eps not found in params.json"
|
||||
if self.use_break_tok:
|
||||
self.img_break_tok_id = self.find_vparam(["image_break_token_id"])
|
||||
|
||||
# params.json may ship -1 placeholders (Mistral Medium 3.5)
|
||||
# resolve the real id from the bundled tokenizer in that case
|
||||
if self.img_break_tok_id < 0:
|
||||
self.img_break_tok_id = self.get_mistral_token_id("[IMG_BREAK]")
|
||||
else:
|
||||
raise ValueError(f"Unsupported model type: {self.hparams['model_type']}")
|
||||
logger.info(f"Image break token id: {self.img_break_tok_id}")
|
||||
|
|
@ -3156,6 +3162,24 @@ class LlavaVisionModel(MmprojModel):
|
|||
return int(token_data["id"])
|
||||
raise ValueError(f"Token '{token}' not found in tokenizer config.")
|
||||
|
||||
def get_mistral_token_id(self, token: str) -> int:
|
||||
# mistral native format ships tekken.json or a versioned spm tokenizer
|
||||
tekken_file = self.dir_model / "tekken.json"
|
||||
if tekken_file.is_file():
|
||||
with open(tekken_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
for entry in data.get("special_tokens", []):
|
||||
if entry.get("token_str") == token:
|
||||
return int(entry["rank"])
|
||||
tokenizer_json_file = self.dir_model / "tokenizer.json"
|
||||
if tokenizer_json_file.is_file():
|
||||
with open(tokenizer_json_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
for entry in data.get("added_tokens", []):
|
||||
if entry.get("content") == token:
|
||||
return int(entry["id"])
|
||||
raise ValueError(f"Token '{token}' not found in mistral tokenizer files.")
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
super().set_gguf_parameters()
|
||||
hparams = self.hparams
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue