From 11a85d62fc3eb781c4095bf7f3fb47cb898cfb0b Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Tue, 24 Feb 2026 22:21:17 +0800
Subject: [PATCH] lowvram for music lm

---
 expose.h                            |  1 +
 koboldcpp.py                        |  8 ++++++-
 otherarch/acestep/ace-qwen3.cpp     | 36 +++++++++++++++++++++++++----
 otherarch/acestep/dit-vae.cpp       |  2 +-
 otherarch/acestep/music_adapter.cpp |  9 ++++++--
 5 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/expose.h b/expose.h
index 34fb51d2e..8de486d22 100644
--- a/expose.h
+++ b/expose.h
@@ -332,6 +332,7 @@ struct music_load_model_inputs
     const char * musicembedding_filename = nullptr;
     const char * musicdiffusion_filename = nullptr;
     const char * musicvae_filename = nullptr;
+    const bool lowvram = false;
     const char * executable_path = nullptr;
     const int kcpp_main_gpu = 0;
     const char * vulkan_info = nullptr;
diff --git a/koboldcpp.py b/koboldcpp.py
index 8898f0158..4949890dc 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -442,6 +442,7 @@ class music_load_model_inputs(ctypes.Structure):
                 ("musicembedding_filename", ctypes.c_char_p),
                 ("musicdiffusion_filename", ctypes.c_char_p),
                 ("musicvae_filename", ctypes.c_char_p),
+                ("lowvram", ctypes.c_bool),
                 ("executable_path", ctypes.c_char_p),
                 ("kcpp_main_gpu", ctypes.c_int),
                 ("vulkan_info", ctypes.c_char_p),
@@ -2371,6 +2372,7 @@ def music_load_model(musicllm,musicembedding,musicdiffusion,musicvae):
     inputs.musicembedding_filename = musicembedding.encode("UTF-8")
     inputs.musicdiffusion_filename = musicdiffusion.encode("UTF-8")
     inputs.musicvae_filename = musicvae.encode("UTF-8")
+    inputs.lowvram = True if args.musiclowvram else False
     inputs = set_backend_props(inputs)
     ret = handle.music_load_model(inputs)
     return ret
@@ -5778,6 +5780,7 @@ def show_gui():
     musicembeddings_var = ctk.StringVar()
     musicdiffusion_var = ctk.StringVar()
     musicvae_var = ctk.StringVar()
+    musiclowvram_var = ctk.IntVar(value=0)
 
     embeddings_model_var = ctk.StringVar()
     embeddings_ctx_var = ctk.StringVar(value=str(""))
@@ -6581,7 +6584,7 @@ def show_gui():
     makefileentry(audio_tab, "MusicEmbeds:", "Select music embedding model (e.g Qwen3-Embedding-0.6B)", musicembeddings_var, 32, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music embedding model (e.g Qwen3-Embedding-0.6B)")
     makefileentry(audio_tab, "MusicDiffuser:", "Select music diffusion (DiT) model (e.g acestep-v15-turbo)", musicdiffusion_var, 34, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music diffusion (DiT) model (e.g acestep-v15-turbo)")
     makefileentry(audio_tab, "MusicVAE:", "Select music VAE model", musicvae_var, 36, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music VAE model")
-
+    makecheckbox(audio_tab, "Music Low VRAM", musiclowvram_var, 38, 0,tooltiptxt="Unload music models when not in use.")
 
     admin_tab = tabcontent["Admin"]
     def toggleadmin(a,b,c):
@@ -6900,6 +6903,7 @@ def show_gui():
         args.musicembeddings = musicembeddings_var.get()
         args.musicdiffusion = musicdiffusion_var.get()
         args.musicvae = musicvae_var.get()
+        args.musiclowvram = musiclowvram_var.get()==1
 
         args.admin = (admin_var.get()==1 and not args.cli)
         args.admindir = admin_dir_var.get()
@@ -7147,6 +7151,7 @@ def show_gui():
         musicembeddings_var.set(dict["musicembeddings"] if ("musicembeddings" in dict and dict["musicembeddings"]) else "")
         musicdiffusion_var.set(dict["musicdiffusion"] if ("musicdiffusion" in dict and dict["musicdiffusion"]) else "")
         musicvae_var.set(dict["musicvae"] if ("musicvae" in dict and dict["musicvae"]) else "")
+        musiclowvram_var.set(dict["musiclowvram"] if ("musiclowvram" in dict) else 0)
 
         embeddings_model_var.set(dict["embeddingsmodel"] if ("embeddingsmodel" in dict and dict["embeddingsmodel"]) else "")
         embeddings_ctx_var.set(str(dict["embeddingsmaxctx"]) if ("embeddingsmaxctx" in dict and dict["embeddingsmaxctx"]) else "")
@@ -9278,6 +9283,7 @@ if __name__ == '__main__':
     musicparsergroup.add_argument("--musicembeddings", metavar=('[filename]'), help="Select music embedding model (e.g Qwen3-Embedding-0.6B)", default="")
     musicparsergroup.add_argument("--musicdiffusion", metavar=('[filename]'), help="Select music diffusion (DiT) model (e.g acestep-v15-turbo)", default="")
     musicparsergroup.add_argument("--musicvae", metavar=('[filename]'), help="Select music VAE model", default="")
+    musicparsergroup.add_argument("--musiclowvram", help="Unload music models when not in use", action='store_true')
 
     embeddingsparsergroup = parser.add_argument_group('Embeddings Model Commands')
     embeddingsparsergroup.add_argument("--embeddingsmodel", metavar=('[filename]'), help="Specify an embeddings model to be loaded for generating embedding vectors.", default="")
diff --git a/otherarch/acestep/ace-qwen3.cpp b/otherarch/acestep/ace-qwen3.cpp
index c49943d34..3fb3817af 100644
--- a/otherarch/acestep/ace-qwen3.cpp
+++ b/otherarch/acestep/ace-qwen3.cpp
@@ -1424,9 +1424,13 @@ int main(int argc, char ** argv) {
 static Qwen3LM acestep_llm;
 static BPETokenizer acestep_bpe;
 static bool acestep_lm_loaded = false;
+static std::string acestep_lm_path = "";
+static bool acestep_lm_lowvram = false;
 
-bool load_acestep_lm(std::string model_path)
+bool load_acestep_lm(std::string model_path, bool lowvram)
 {
+    acestep_lm_lowvram = lowvram;
+    acestep_lm_path = model_path;
     acestep_lm_loaded = false;
     int max_seq     = 8192;
     const int batch_size  = 1; //only bs 1 is allowed
@@ -1442,8 +1446,28 @@ bool load_acestep_lm(std::string model_path)
     return true;
 }
 
+void unload_acestep_lm()
+{
+    if(acestep_lm_loaded)
+    {
+        acestep_lm_loaded = false;
+        qw3lm_free(&acestep_llm);
+    }
+}
+
 std::string acestep_prepare_request(const music_generation_inputs inputs)
 {
+    if(!acestep_lm_loaded && acestep_lm_path!="")
+    {
+        printf("\nRuntime reload Music LM model...\n");
+        bool ok = load_acestep_lm(acestep_lm_path, acestep_lm_lowvram);
+        if(!ok)
+        {
+            printf("\nERROR: Acestep LM load fail\n");
+            return "";
+        }
+    }
+
     const int batch_size = 1;
     bool use_fsm = true;
     MetadataFSM fsm;
@@ -1614,10 +1638,12 @@ std::string acestep_prepare_request(const music_generation_inputs inputs)
     oss << "  \"audio_codes\": \"" << json_escape(rr.audio_codes) << "\"\n";
     oss << "}\n";
     std::string output_json = oss.str();
+
+    if(acestep_lm_lowvram)
+    {
+        unload_acestep_lm();
+    }
+
     return output_json;
 }
 
-void unload_acestep()
-{
-    qw3lm_free(&acestep_llm);
-}
diff --git a/otherarch/acestep/dit-vae.cpp b/otherarch/acestep/dit-vae.cpp
index 35627e6f5..5ee930080 100644
--- a/otherarch/acestep/dit-vae.cpp
+++ b/otherarch/acestep/dit-vae.cpp
@@ -587,7 +587,7 @@ static CondGGML music_cond = {};
 static std::vector<float> silence_full;  // [15000, 64] f32
 static DetokGGML detok = {};
 
-bool load_acestep_dit(std::string music_embd_path, std::string music_dit_path, std::string music_vae_path)
+bool load_acestep_dit(std::string music_embd_path, std::string music_dit_path, std::string music_vae_path, bool lowvram)
 {
     const char * text_enc_gguf = music_embd_path.c_str();
     const char * dit_gguf      = music_dit_path.c_str();
diff --git a/otherarch/acestep/music_adapter.cpp b/otherarch/acestep/music_adapter.cpp
index 4ed5a7d0a..5a0db3edc 100644
--- a/otherarch/acestep/music_adapter.cpp
+++ b/otherarch/acestep/music_adapter.cpp
@@ -52,17 +52,22 @@ bool musictype_load_model(const music_load_model_inputs inputs)
     std::string musicembedding_filename = inputs.musicembedding_filename;
     std::string musicdiffusion_filename = inputs.musicdiffusion_filename;
     std::string musicvae_filename = inputs.musicvae_filename;
+    bool lowvram = inputs.lowvram;
     printf("\nLoading Music Gen LLM Model: %s\nLoading Music Gen Embed Model: %s\nLoading Music Gen Diffusion Model: %s\nLoading Music Gen VAE Model: %s\n",
     musicllm_filename.c_str(),musicembedding_filename.c_str(),musicdiffusion_filename.c_str(),musicvae_filename.c_str());
     musicdebugmode = inputs.debugmode;
 
-    bool ok = load_acestep_lm(musicllm_filename);
+    bool ok = load_acestep_lm(musicllm_filename,lowvram);
     if (!ok) {
         printf("\nFailed to load Music Gen LM Model!\n");
         return false;
     }
+    if(lowvram)
+    {
+        unload_acestep_lm();
+    }
 
-    ok = load_acestep_dit(musicembedding_filename,musicdiffusion_filename,musicvae_filename);
+    ok = load_acestep_dit(musicembedding_filename,musicdiffusion_filename,musicvae_filename,lowvram);
     if (!ok) {
         printf("\nFailed to load Music Gen Diffusion, Embed or VAE Model!\n");
         return false;