mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-06-01 06:00:36 +00:00
mtmd: fix gemma 4 audio rms norm eps (#23815)
* mtmd: fix gemma 4 audio rms norm eps * Update tools/mtmd/clip.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
parent
dd1557907a
commit
d6be3158e1
2 changed files with 9 additions and 5 deletions
|
|
@ -786,14 +786,15 @@ class Gemma4VisionAudioModel(MmprojModel):
|
|||
super().set_gguf_parameters()
|
||||
|
||||
# vision params
|
||||
assert self.hparams_vision is not None
|
||||
self.gguf_writer.add_clip_vision_projector_type(gguf.VisionProjectorType.GEMMA4V)
|
||||
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams.get("layer_norm_eps", 1e-6))
|
||||
self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams_vision.get("layer_norm_eps", 1e-6))
|
||||
|
||||
# audio params
|
||||
if self.hparams_audio:
|
||||
self.gguf_writer.add_clip_audio_projector_type(gguf.VisionProjectorType.GEMMA4A)
|
||||
self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["feat_in"])
|
||||
self.gguf_writer.add_audio_attention_layernorm_eps(1e-5)
|
||||
assert self.hparams_audio is not None
|
||||
self.gguf_writer.add_clip_audio_projector_type(gguf.VisionProjectorType.GEMMA4A)
|
||||
self.gguf_writer.add_audio_num_mel_bins(self.hparams_audio["feat_in"])
|
||||
self.gguf_writer.add_audio_attention_layernorm_eps(self.hparams_audio.get("layer_norm_eps", 1e-6))
|
||||
|
||||
def is_audio_tensor(self, name: str) -> bool:
|
||||
return "audio_tower" in name or "embed_audio" in name
|
||||
|
|
|
|||
|
|
@ -1552,6 +1552,9 @@ struct clip_model_loader {
|
|||
hparams.audio_n_fft = 512;
|
||||
hparams.audio_window_len = 320; // 20ms frame (NOT 25ms/400)
|
||||
hparams.audio_hop_len = 160;
|
||||
// due to a mistake in the original conversion code, rms_norm_eps is set to a wrong value
|
||||
// since all gemma4a models use 1e-6, we just hardcode it here to avoid re-conversion
|
||||
hparams.eps = 1e-6f;
|
||||
} break;
|
||||
case PROJECTOR_TYPE_GRANITE_SPEECH:
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue