lowvram for music lm

2026-06-02 07:19:23 +00:00 · 2026-02-24 22:21:17 +08:00 · 2026-02-24 22:21:17 +08:00 · 11a85d62fc
commit 11a85d62fc
parent aa58d1ed3b
5 changed files with 47 additions and 9 deletions
--- a/expose.h
+++ b/expose.h
@ -332,6 +332,7 @@ struct music_load_model_inputs
    const char * musicembedding_filename = nullptr;
    const char * musicdiffusion_filename = nullptr;
    const char * musicvae_filename = nullptr;
+    const bool lowvram = false;
    const char * executable_path = nullptr;
    const int kcpp_main_gpu = 0;
    const char * vulkan_info = nullptr;
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -442,6 +442,7 @@ class music_load_model_inputs(ctypes.Structure):
                ("musicembedding_filename", ctypes.c_char_p),
                ("musicdiffusion_filename", ctypes.c_char_p),
                ("musicvae_filename", ctypes.c_char_p),
+                ("lowvram", ctypes.c_bool),
                ("executable_path", ctypes.c_char_p),
                ("kcpp_main_gpu", ctypes.c_int),
                ("vulkan_info", ctypes.c_char_p),
@ -2371,6 +2372,7 @@ def music_load_model(musicllm,musicembedding,musicdiffusion,musicvae):
    inputs.musicembedding_filename = musicembedding.encode("UTF-8")
    inputs.musicdiffusion_filename = musicdiffusion.encode("UTF-8")
    inputs.musicvae_filename = musicvae.encode("UTF-8")
+    inputs.lowvram = True if args.musiclowvram else False
    inputs = set_backend_props(inputs)
    ret = handle.music_load_model(inputs)
    return ret
@ -5778,6 +5780,7 @@ def show_gui():
    musicembeddings_var = ctk.StringVar()
    musicdiffusion_var = ctk.StringVar()
    musicvae_var = ctk.StringVar()
+    musiclowvram_var = ctk.IntVar(value=0)

    embeddings_model_var = ctk.StringVar()
    embeddings_ctx_var = ctk.StringVar(value=str(""))
@ -6581,7 +6584,7 @@ def show_gui():
    makefileentry(audio_tab, "MusicEmbeds:", "Select music embedding model (e.g Qwen3-Embedding-0.6B)", musicembeddings_var, 32, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music embedding model (e.g Qwen3-Embedding-0.6B)")
    makefileentry(audio_tab, "MusicDiffuser:", "Select music diffusion (DiT) model (e.g acestep-v15-turbo)", musicdiffusion_var, 34, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music diffusion (DiT) model (e.g acestep-v15-turbo)")
    makefileentry(audio_tab, "MusicVAE:", "Select music VAE model", musicvae_var, 36, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music VAE model")
-
+    makecheckbox(audio_tab, "Music Low VRAM", musiclowvram_var, 38, 0,tooltiptxt="Unload music models when not in use.")

    admin_tab = tabcontent["Admin"]
    def toggleadmin(a,b,c):
@ -6900,6 +6903,7 @@ def show_gui():
        args.musicembeddings = musicembeddings_var.get()
        args.musicdiffusion = musicdiffusion_var.get()
        args.musicvae = musicvae_var.get()
+        args.musiclowvram = musiclowvram_var.get()==1

        args.admin = (admin_var.get()==1 and not args.cli)
        args.admindir = admin_dir_var.get()
@ -7147,6 +7151,7 @@ def show_gui():
        musicembeddings_var.set(dict["musicembeddings"] if ("musicembeddings" in dict and dict["musicembeddings"]) else "")
        musicdiffusion_var.set(dict["musicdiffusion"] if ("musicdiffusion" in dict and dict["musicdiffusion"]) else "")
        musicvae_var.set(dict["musicvae"] if ("musicvae" in dict and dict["musicvae"]) else "")
+        musiclowvram_var.set(dict["musiclowvram"] if ("musiclowvram" in dict) else 0)

        embeddings_model_var.set(dict["embeddingsmodel"] if ("embeddingsmodel" in dict and dict["embeddingsmodel"]) else "")
        embeddings_ctx_var.set(str(dict["embeddingsmaxctx"]) if ("embeddingsmaxctx" in dict and dict["embeddingsmaxctx"]) else "")
@ -9278,6 +9283,7 @@ if __name__ == '__main__':
    musicparsergroup.add_argument("--musicembeddings", metavar=('[filename]'), help="Select music embedding model (e.g Qwen3-Embedding-0.6B)", default="")
    musicparsergroup.add_argument("--musicdiffusion", metavar=('[filename]'), help="Select music diffusion (DiT) model (e.g acestep-v15-turbo)", default="")
    musicparsergroup.add_argument("--musicvae", metavar=('[filename]'), help="Select music VAE model", default="")
+    musicparsergroup.add_argument("--musiclowvram", help="Unload music models when not in use", action='store_true')

    embeddingsparsergroup = parser.add_argument_group('Embeddings Model Commands')
    embeddingsparsergroup.add_argument("--embeddingsmodel", metavar=('[filename]'), help="Specify an embeddings model to be loaded for generating embedding vectors.", default="")
--- a/otherarch/acestep/ace-qwen3.cpp
+++ b/otherarch/acestep/ace-qwen3.cpp
@ -1424,9 +1424,13 @@ int main(int argc, char ** argv) {
 static Qwen3LM acestep_llm;
 static BPETokenizer acestep_bpe;
 static bool acestep_lm_loaded = false;
+static std::string acestep_lm_path = "";
+static bool acestep_lm_lowvram = false;

-bool load_acestep_lm(std::string model_path)
+bool load_acestep_lm(std::string model_path, bool lowvram)
 {
+    acestep_lm_lowvram = lowvram;
+    acestep_lm_path = model_path;
    acestep_lm_loaded = false;
    int max_seq     = 8192;
    const int batch_size  = 1; //only bs 1 is allowed
@ -1442,8 +1446,28 @@ bool load_acestep_lm(std::string model_path)
    return true;
 }

+void unload_acestep_lm()
+{
+    if(acestep_lm_loaded)
+    {
+        acestep_lm_loaded = false;
+        qw3lm_free(&acestep_llm);
+    }
+}
+
 std::string acestep_prepare_request(const music_generation_inputs inputs)
 {
+    if(!acestep_lm_loaded && acestep_lm_path!="")
+    {
+        printf("\nRuntime reload Music LM model...\n");
+        bool ok = load_acestep_lm(acestep_lm_path, acestep_lm_lowvram);
+        if(!ok)
+        {
+            printf("\nERROR: Acestep LM load fail\n");
+            return "";
+        }
+    }
+
    const int batch_size = 1;
    bool use_fsm = true;
    MetadataFSM fsm;
@ -1614,10 +1638,12 @@ std::string acestep_prepare_request(const music_generation_inputs inputs)
    oss << "  \"audio_codes\": \"" << json_escape(rr.audio_codes) << "\"\n";
    oss << "}\n";
    std::string output_json = oss.str();
+
+    if(acestep_lm_lowvram)
+    {
+        unload_acestep_lm();
+    }
+
    return output_json;
 }

-void unload_acestep()
-{
-    qw3lm_free(&acestep_llm);
-}
--- a/otherarch/acestep/dit-vae.cpp
+++ b/otherarch/acestep/dit-vae.cpp
@ -587,7 +587,7 @@ static CondGGML music_cond = {};
 static std::vector<float> silence_full;  // [15000, 64] f32
 static DetokGGML detok = {};

-bool load_acestep_dit(std::string music_embd_path, std::string music_dit_path, std::string music_vae_path)
+bool load_acestep_dit(std::string music_embd_path, std::string music_dit_path, std::string music_vae_path, bool lowvram)
 {
    const char * text_enc_gguf = music_embd_path.c_str();
    const char * dit_gguf      = music_dit_path.c_str();
--- a/otherarch/acestep/music_adapter.cpp
+++ b/otherarch/acestep/music_adapter.cpp
@ -52,17 +52,22 @@ bool musictype_load_model(const music_load_model_inputs inputs)
    std::string musicembedding_filename = inputs.musicembedding_filename;
    std::string musicdiffusion_filename = inputs.musicdiffusion_filename;
    std::string musicvae_filename = inputs.musicvae_filename;
+    bool lowvram = inputs.lowvram;
    printf("\nLoading Music Gen LLM Model: %s\nLoading Music Gen Embed Model: %s\nLoading Music Gen Diffusion Model: %s\nLoading Music Gen VAE Model: %s\n",
    musicllm_filename.c_str(),musicembedding_filename.c_str(),musicdiffusion_filename.c_str(),musicvae_filename.c_str());
    musicdebugmode = inputs.debugmode;

-    bool ok = load_acestep_lm(musicllm_filename);
+    bool ok = load_acestep_lm(musicllm_filename,lowvram);
    if (!ok) {
        printf("\nFailed to load Music Gen LM Model!\n");
        return false;
    }
+    if(lowvram)
+    {
+        unload_acestep_lm();
+    }

-    ok = load_acestep_dit(musicembedding_filename,musicdiffusion_filename,musicvae_filename);
+    ok = load_acestep_dit(musicembedding_filename,musicdiffusion_filename,musicvae_filename,lowvram);
    if (!ok) {
        printf("\nFailed to load Music Gen Diffusion, Embed or VAE Model!\n");
        return false;