From aa58d1ed3b562c0278ce60ea94625aca25c8ae6e Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Tue, 24 Feb 2026 21:55:57 +0800 Subject: [PATCH] all working, but needs to optimize vram --- otherarch/acestep/ace-qwen3.cpp | 10 ++++++++++ otherarch/acestep/dit-vae.cpp | 19 +++++++++++++++---- otherarch/acestep/request.cpp | 4 ++-- otherarch/utils.cpp | 12 ++++++++++++ otherarch/utils.h | 1 + 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/otherarch/acestep/ace-qwen3.cpp b/otherarch/acestep/ace-qwen3.cpp index dc01d7c2d..c49943d34 100644 --- a/otherarch/acestep/ace-qwen3.cpp +++ b/otherarch/acestep/ace-qwen3.cpp @@ -1484,6 +1484,10 @@ std::string acestep_prepare_request(const music_generation_inputs inputs) ace.timesignature = req.timesignature; ace.vocal_language = req.vocal_language; + //kcpp: codes suck don't use them + req.thinking = false; + req.audio_codes = ""; + bool user_has_codes = !req.audio_codes.empty(); bool need_lm_codes = req.thinking && !user_has_codes; @@ -1578,6 +1582,12 @@ std::string acestep_prepare_request(const music_generation_inputs inputs) if (!batch_codes[0].empty()) rr.audio_codes = batch_codes[0]; rr.seed = seed; + std::string prefix_erase = "# Lyric"; + // Check if the string is long enough and starts with the prefix + if (rr.lyrics.size() >= prefix_erase.size() && rr.lyrics.compare(0, prefix_erase.size(), prefix_erase) == 0) { + rr.lyrics = rr.lyrics.substr(prefix_erase.size()); // Returns a new string starting after the prefix + } + //now convert to string std::ostringstream oss; oss << "{\n"; diff --git a/otherarch/acestep/dit-vae.cpp b/otherarch/acestep/dit-vae.cpp index 721093cbf..35627e6f5 100644 --- a/otherarch/acestep/dit-vae.cpp +++ b/otherarch/acestep/dit-vae.cpp @@ -692,6 +692,7 @@ std::string acestep_generate_audio(const music_generation_inputs inputs) if (req.caption.empty()) { req.caption = "An interesting song"; } + req.thinking = false; const int FRAMES_PER_SECOND = 25; int Oc = music_dit_cfg.out_channels; // 64 @@ -917,11 +918,21 @@ std::string acestep_generate_audio(const music_generation_inputs inputs) } } - // output wav - std::vector resampled_buf = resample_wav(audio,48000,24000); - std::string finalb64 = save_ulaw_wav8_base64(audio, 24000); + // std::string opath = "egghenlo.wav"; + // if (write_wav(opath.c_str(), audio.data(), T_audio, 48000)) { + // fprintf(stderr, "[VAE Batch%d] Wrote %s: %d samples (%.2fs @ 48kHz stereo)\n", + // b, opath.c_str(), T_audio, (float)T_audio / 48000.0f); + // } else { + // fprintf(stderr, "[VAE Batch%d] FATAL: failed to write %s\n", b, opath.c_str()); + // } - fprintf(stderr, "[Request Done]\n"); + // output wav + float muslen = (float)T_audio / 48000.0f; + std::vector mono = mix_planar_stereo_to_mono(audio.data(), T_audio); + std::vector resampled_buf = resample_wav(mono,48000,32000); + std::string finalb64 = save_wav16_base64(resampled_buf, 32000); + + fprintf(stderr, "[Request Done: Music Length %.2fs]\n",muslen); return finalb64; } diff --git a/otherarch/acestep/request.cpp b/otherarch/acestep/request.cpp index 7a57ca0e8..0d39c30cb 100644 --- a/otherarch/acestep/request.cpp +++ b/otherarch/acestep/request.cpp @@ -22,12 +22,12 @@ void request_init(AceRequest * r) { r->vocal_language = "unknown"; r->task_type = "text2music"; r->seed = -1; - r->thinking = true; + r->thinking = false; r->lm_temperature = 0.85f; r->lm_cfg_scale = 2.0f; r->lm_top_p = 0.9f; r->lm_top_k = 0; - r->lm_negative_prompt = "NO USER INPUT"; + r->lm_negative_prompt = ""; r->audio_codes = ""; r->inference_steps = 8; r->guidance_scale = 1.0f; diff --git a/otherarch/utils.cpp b/otherarch/utils.cpp index e311c7e90..77f5d7c11 100644 --- a/otherarch/utils.cpp +++ b/otherarch/utils.cpp @@ -389,6 +389,18 @@ std::vector resample_wav(const std::vector& input, uint32_t input_ return output; } +std::vector mix_planar_stereo_to_mono(const float* audio, int T_audio) +{ + std::vector mono(T_audio); + const float* left = audio; + const float* right = audio + T_audio; + for (int t = 0; t < T_audio; ++t) + { + mono[t] = 0.5f * (left[t] + right[t]); + } + return mono; +} + static uint8_t linear_to_mulaw(int16_t sample) { const int16_t BIAS = 0x84; // 132 diff --git a/otherarch/utils.h b/otherarch/utils.h index 4fccd78cb..f993aed5c 100644 --- a/otherarch/utils.h +++ b/otherarch/utils.h @@ -63,6 +63,7 @@ std::string kcpp_base64_encode(const std::string &data); std::string get_timestamp_str(); std::vector> split_big_vector(const std::vector& big_arr, size_t chunk_size); std::vector resample_wav(const std::vector& input, uint32_t input_rate, uint32_t output_rate); +std::vector mix_planar_stereo_to_mono(const float* audio, int T_audio); int32_t kcpp_quick_sample(float * logits, const int n_logits, const std::vector & last_n_tokens, float rep_pen, float top_p, int top_k, float temp, std::mt19937 & rng);