From 11a85d62fc3eb781c4095bf7f3fb47cb898cfb0b Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Tue, 24 Feb 2026 22:21:17 +0800 Subject: [PATCH] lowvram for music lm --- expose.h | 1 + koboldcpp.py | 8 ++++++- otherarch/acestep/ace-qwen3.cpp | 36 +++++++++++++++++++++++++---- otherarch/acestep/dit-vae.cpp | 2 +- otherarch/acestep/music_adapter.cpp | 9 ++++++-- 5 files changed, 47 insertions(+), 9 deletions(-) diff --git a/expose.h b/expose.h index 34fb51d2e..8de486d22 100644 --- a/expose.h +++ b/expose.h @@ -332,6 +332,7 @@ struct music_load_model_inputs const char * musicembedding_filename = nullptr; const char * musicdiffusion_filename = nullptr; const char * musicvae_filename = nullptr; + const bool lowvram = false; const char * executable_path = nullptr; const int kcpp_main_gpu = 0; const char * vulkan_info = nullptr; diff --git a/koboldcpp.py b/koboldcpp.py index 8898f0158..4949890dc 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -442,6 +442,7 @@ class music_load_model_inputs(ctypes.Structure): ("musicembedding_filename", ctypes.c_char_p), ("musicdiffusion_filename", ctypes.c_char_p), ("musicvae_filename", ctypes.c_char_p), + ("lowvram", ctypes.c_bool), ("executable_path", ctypes.c_char_p), ("kcpp_main_gpu", ctypes.c_int), ("vulkan_info", ctypes.c_char_p), @@ -2371,6 +2372,7 @@ def music_load_model(musicllm,musicembedding,musicdiffusion,musicvae): inputs.musicembedding_filename = musicembedding.encode("UTF-8") inputs.musicdiffusion_filename = musicdiffusion.encode("UTF-8") inputs.musicvae_filename = musicvae.encode("UTF-8") + inputs.lowvram = True if args.musiclowvram else False inputs = set_backend_props(inputs) ret = handle.music_load_model(inputs) return ret @@ -5778,6 +5780,7 @@ def show_gui(): musicembeddings_var = ctk.StringVar() musicdiffusion_var = ctk.StringVar() musicvae_var = ctk.StringVar() + musiclowvram_var = ctk.IntVar(value=0) embeddings_model_var = ctk.StringVar() embeddings_ctx_var = ctk.StringVar(value=str("")) @@ -6581,7 +6584,7 @@ def show_gui(): makefileentry(audio_tab, "MusicEmbeds:", "Select music embedding model (e.g Qwen3-Embedding-0.6B)", musicembeddings_var, 32, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music embedding model (e.g Qwen3-Embedding-0.6B)") makefileentry(audio_tab, "MusicDiffuser:", "Select music diffusion (DiT) model (e.g acestep-v15-turbo)", musicdiffusion_var, 34, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music diffusion (DiT) model (e.g acestep-v15-turbo)") makefileentry(audio_tab, "MusicVAE:", "Select music VAE model", musicvae_var, 36, width=280, singlerow=True, dialog_type=0, tooltiptxt="Select music VAE model") - + makecheckbox(audio_tab, "Music Low VRAM", musiclowvram_var, 38, 0,tooltiptxt="Unload music models when not in use.") admin_tab = tabcontent["Admin"] def toggleadmin(a,b,c): @@ -6900,6 +6903,7 @@ def show_gui(): args.musicembeddings = musicembeddings_var.get() args.musicdiffusion = musicdiffusion_var.get() args.musicvae = musicvae_var.get() + args.musiclowvram = musiclowvram_var.get()==1 args.admin = (admin_var.get()==1 and not args.cli) args.admindir = admin_dir_var.get() @@ -7147,6 +7151,7 @@ def show_gui(): musicembeddings_var.set(dict["musicembeddings"] if ("musicembeddings" in dict and dict["musicembeddings"]) else "") musicdiffusion_var.set(dict["musicdiffusion"] if ("musicdiffusion" in dict and dict["musicdiffusion"]) else "") musicvae_var.set(dict["musicvae"] if ("musicvae" in dict and dict["musicvae"]) else "") + musiclowvram_var.set(dict["musiclowvram"] if ("musiclowvram" in dict) else 0) embeddings_model_var.set(dict["embeddingsmodel"] if ("embeddingsmodel" in dict and dict["embeddingsmodel"]) else "") embeddings_ctx_var.set(str(dict["embeddingsmaxctx"]) if ("embeddingsmaxctx" in dict and dict["embeddingsmaxctx"]) else "") @@ -9278,6 +9283,7 @@ if __name__ == '__main__': musicparsergroup.add_argument("--musicembeddings", metavar=('[filename]'), help="Select music embedding model (e.g Qwen3-Embedding-0.6B)", default="") musicparsergroup.add_argument("--musicdiffusion", metavar=('[filename]'), help="Select music diffusion (DiT) model (e.g acestep-v15-turbo)", default="") musicparsergroup.add_argument("--musicvae", metavar=('[filename]'), help="Select music VAE model", default="") + musicparsergroup.add_argument("--musiclowvram", help="Unload music models when not in use", action='store_true') embeddingsparsergroup = parser.add_argument_group('Embeddings Model Commands') embeddingsparsergroup.add_argument("--embeddingsmodel", metavar=('[filename]'), help="Specify an embeddings model to be loaded for generating embedding vectors.", default="") diff --git a/otherarch/acestep/ace-qwen3.cpp b/otherarch/acestep/ace-qwen3.cpp index c49943d34..3fb3817af 100644 --- a/otherarch/acestep/ace-qwen3.cpp +++ b/otherarch/acestep/ace-qwen3.cpp @@ -1424,9 +1424,13 @@ int main(int argc, char ** argv) { static Qwen3LM acestep_llm; static BPETokenizer acestep_bpe; static bool acestep_lm_loaded = false; +static std::string acestep_lm_path = ""; +static bool acestep_lm_lowvram = false; -bool load_acestep_lm(std::string model_path) +bool load_acestep_lm(std::string model_path, bool lowvram) { + acestep_lm_lowvram = lowvram; + acestep_lm_path = model_path; acestep_lm_loaded = false; int max_seq = 8192; const int batch_size = 1; //only bs 1 is allowed @@ -1442,8 +1446,28 @@ bool load_acestep_lm(std::string model_path) return true; } +void unload_acestep_lm() +{ + if(acestep_lm_loaded) + { + acestep_lm_loaded = false; + qw3lm_free(&acestep_llm); + } +} + std::string acestep_prepare_request(const music_generation_inputs inputs) { + if(!acestep_lm_loaded && acestep_lm_path!="") + { + printf("\nRuntime reload Music LM model...\n"); + bool ok = load_acestep_lm(acestep_lm_path, acestep_lm_lowvram); + if(!ok) + { + printf("\nERROR: Acestep LM load fail\n"); + return ""; + } + } + const int batch_size = 1; bool use_fsm = true; MetadataFSM fsm; @@ -1614,10 +1638,12 @@ std::string acestep_prepare_request(const music_generation_inputs inputs) oss << " \"audio_codes\": \"" << json_escape(rr.audio_codes) << "\"\n"; oss << "}\n"; std::string output_json = oss.str(); + + if(acestep_lm_lowvram) + { + unload_acestep_lm(); + } + return output_json; } -void unload_acestep() -{ - qw3lm_free(&acestep_llm); -} diff --git a/otherarch/acestep/dit-vae.cpp b/otherarch/acestep/dit-vae.cpp index 35627e6f5..5ee930080 100644 --- a/otherarch/acestep/dit-vae.cpp +++ b/otherarch/acestep/dit-vae.cpp @@ -587,7 +587,7 @@ static CondGGML music_cond = {}; static std::vector silence_full; // [15000, 64] f32 static DetokGGML detok = {}; -bool load_acestep_dit(std::string music_embd_path, std::string music_dit_path, std::string music_vae_path) +bool load_acestep_dit(std::string music_embd_path, std::string music_dit_path, std::string music_vae_path, bool lowvram) { const char * text_enc_gguf = music_embd_path.c_str(); const char * dit_gguf = music_dit_path.c_str(); diff --git a/otherarch/acestep/music_adapter.cpp b/otherarch/acestep/music_adapter.cpp index 4ed5a7d0a..5a0db3edc 100644 --- a/otherarch/acestep/music_adapter.cpp +++ b/otherarch/acestep/music_adapter.cpp @@ -52,17 +52,22 @@ bool musictype_load_model(const music_load_model_inputs inputs) std::string musicembedding_filename = inputs.musicembedding_filename; std::string musicdiffusion_filename = inputs.musicdiffusion_filename; std::string musicvae_filename = inputs.musicvae_filename; + bool lowvram = inputs.lowvram; printf("\nLoading Music Gen LLM Model: %s\nLoading Music Gen Embed Model: %s\nLoading Music Gen Diffusion Model: %s\nLoading Music Gen VAE Model: %s\n", musicllm_filename.c_str(),musicembedding_filename.c_str(),musicdiffusion_filename.c_str(),musicvae_filename.c_str()); musicdebugmode = inputs.debugmode; - bool ok = load_acestep_lm(musicllm_filename); + bool ok = load_acestep_lm(musicllm_filename,lowvram); if (!ok) { printf("\nFailed to load Music Gen LM Model!\n"); return false; } + if(lowvram) + { + unload_acestep_lm(); + } - ok = load_acestep_dit(musicembedding_filename,musicdiffusion_filename,musicvae_filename); + ok = load_acestep_dit(musicembedding_filename,musicdiffusion_filename,musicvae_filename,lowvram); if (!ok) { printf("\nFailed to load Music Gen Diffusion, Embed or VAE Model!\n"); return false;