diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index e5dea18ae..bf76fa406 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2176,7 +2176,8 @@ class MmprojModel(ModelBase): text_config = { k: v for k, v in self.hparams.items() if k not in ["vision_encoder", "audio_encoder"] } - self.n_embd_text = text_config.get("hidden_dim", 0) + # mistral native params.json: "dim" is the text hidden size ("hidden_dim" is the FFN intermediate size) + self.n_embd_text = text_config.get("dim", 0) assert self.n_embd_text > 0, "n_embd not found in hparams" @@ -3137,6 +3138,11 @@ class LlavaVisionModel(MmprojModel): assert self.hparams["norm_eps"] is not None, "norm_eps not found in params.json" if self.use_break_tok: self.img_break_tok_id = self.find_vparam(["image_break_token_id"]) + + # params.json may ship -1 placeholders (Mistral Medium 3.5) + # resolve the real id from the bundled tokenizer in that case + if self.img_break_tok_id < 0: + self.img_break_tok_id = self.get_mistral_token_id("[IMG_BREAK]") else: raise ValueError(f"Unsupported model type: {self.hparams['model_type']}") logger.info(f"Image break token id: {self.img_break_tok_id}") @@ -3156,6 +3162,24 @@ class LlavaVisionModel(MmprojModel): return int(token_data["id"]) raise ValueError(f"Token '{token}' not found in tokenizer config.") + def get_mistral_token_id(self, token: str) -> int: + # mistral native format ships tekken.json or a versioned spm tokenizer + tekken_file = self.dir_model / "tekken.json" + if tekken_file.is_file(): + with open(tekken_file, "r", encoding="utf-8") as f: + data = json.load(f) + for entry in data.get("special_tokens", []): + if entry.get("token_str") == token: + return int(entry["rank"]) + tokenizer_json_file = self.dir_model / "tokenizer.json" + if tokenizer_json_file.is_file(): + with open(tokenizer_json_file, "r", encoding="utf-8") as f: + data = json.load(f) + for entry in data.get("added_tokens", []): + if entry.get("content") == token: + return int(entry["id"]) + raise ValueError(f"Token '{token}' not found in mistral tokenizer files.") + def set_gguf_parameters(self): super().set_gguf_parameters() hparams = self.hparams