mtmd : Adding support for Nvidia Music Flamingo Model (#18470)

* Inital commit, debugging q5_k_s quant * Made hf_to_gguf extend whisper to reduce code duplication * addressed convert_hf_to_gguf pull request issue --------- Co-authored-by: Henry D <henrydorsey147@gmail.com>
2026-04-28 03:30:20 +00:00 · 2025-12-31 06:13:23 -05:00 · 2025-12-31 06:13:23 -05:00 · 9b8329de7a
commit 9b8329de7a
parent 9a6369bb60
7 changed files with 49 additions and 3 deletions
--- a/tools/mtmd/mtmd.cpp
+++ b/tools/mtmd/mtmd.cpp
@ -330,6 +330,7 @@ struct mtmd_context {
            case PROJECTOR_TYPE_ULTRAVOX:
            case PROJECTOR_TYPE_VOXTRAL:
            case PROJECTOR_TYPE_GLMA:
+            case PROJECTOR_TYPE_MUSIC_FLAMINGO:
                audio_preproc = std::make_unique<mtmd_audio_preprocessor_whisper>(ctx_a);
                break;
            case PROJECTOR_TYPE_LFM2A:
@ -352,6 +353,9 @@ struct mtmd_context {
            // [BEGIN_AUDIO] ... (embeddings) ...
            aud_beg = "[BEGIN_AUDIO]";

+        } else if (proj == PROJECTOR_TYPE_MUSIC_FLAMINGO) {
+            // <sound> ... (embeddings) ...
+            aud_beg = "<sound>";
        }
    }