all working, but needs to optimize vram

2026-05-08 09:59:50 +00:00 · 2026-02-24 21:55:57 +08:00 · 2026-02-24 21:55:57 +08:00 · aa58d1ed3b
commit aa58d1ed3b
parent 488c431331
5 changed files with 40 additions and 6 deletions
--- a/otherarch/acestep/ace-qwen3.cpp
+++ b/otherarch/acestep/ace-qwen3.cpp
@ -1484,6 +1484,10 @@ std::string acestep_prepare_request(const music_generation_inputs inputs)
    ace.timesignature  = req.timesignature;
    ace.vocal_language = req.vocal_language;

+    //kcpp: codes suck don't use them
+    req.thinking = false;
+    req.audio_codes = "";
+
    bool user_has_codes = !req.audio_codes.empty();
    bool need_lm_codes  = req.thinking && !user_has_codes;

@ -1578,6 +1582,12 @@ std::string acestep_prepare_request(const music_generation_inputs inputs)
    if (!batch_codes[0].empty()) rr.audio_codes = batch_codes[0];
    rr.seed = seed;

+    std::string prefix_erase = "# Lyric";
+    // Check if the string is long enough and starts with the prefix
+    if (rr.lyrics.size() >= prefix_erase.size() && rr.lyrics.compare(0, prefix_erase.size(), prefix_erase) == 0) {
+        rr.lyrics = rr.lyrics.substr(prefix_erase.size()); // Returns a new string starting after the prefix
+    }
+
    //now convert to string
    std::ostringstream oss;
    oss << "{\n";
--- a/otherarch/acestep/dit-vae.cpp
+++ b/otherarch/acestep/dit-vae.cpp
@ -692,6 +692,7 @@ std::string acestep_generate_audio(const music_generation_inputs inputs)
    if (req.caption.empty()) {
        req.caption = "An interesting song";
    }
+    req.thinking = false;

    const int FRAMES_PER_SECOND = 25;
    int Oc = music_dit_cfg.out_channels;          // 64
@ -917,11 +918,21 @@ std::string acestep_generate_audio(const music_generation_inputs inputs)
        }
    }

-    // output wav
-    std::vector<float> resampled_buf = resample_wav(audio,48000,24000);
-    std::string finalb64 = save_ulaw_wav8_base64(audio, 24000);
+    // std::string opath = "egghenlo.wav";
+    // if (write_wav(opath.c_str(), audio.data(), T_audio, 48000)) {
+    //     fprintf(stderr, "[VAE Batch%d] Wrote %s: %d samples (%.2fs @ 48kHz stereo)\n",
+    //             b, opath.c_str(), T_audio, (float)T_audio / 48000.0f);
+    // } else {
+    //     fprintf(stderr, "[VAE Batch%d] FATAL: failed to write %s\n", b, opath.c_str());
+    // }

-    fprintf(stderr, "[Request Done]\n");
+    // output wav
+    float muslen = (float)T_audio / 48000.0f;
+    std::vector<float> mono = mix_planar_stereo_to_mono(audio.data(), T_audio);
+    std::vector<float> resampled_buf = resample_wav(mono,48000,32000);
+    std::string finalb64 = save_wav16_base64(resampled_buf, 32000);
+
+    fprintf(stderr, "[Request Done: Music Length %.2fs]\n",muslen);
    return finalb64;
 }

--- a/otherarch/acestep/request.cpp
+++ b/otherarch/acestep/request.cpp
@ -22,12 +22,12 @@ void request_init(AceRequest * r) {
    r->vocal_language     = "unknown";
    r->task_type          = "text2music";
    r->seed               = -1;
-    r->thinking           = true;
+    r->thinking           = false;
    r->lm_temperature     = 0.85f;
    r->lm_cfg_scale       = 2.0f;
    r->lm_top_p           = 0.9f;
    r->lm_top_k           = 0;
-    r->lm_negative_prompt = "NO USER INPUT";
+    r->lm_negative_prompt = "";
    r->audio_codes        = "";
    r->inference_steps    = 8;
    r->guidance_scale     = 1.0f;
--- a/otherarch/utils.cpp
+++ b/otherarch/utils.cpp
@ -389,6 +389,18 @@ std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_
    return output;
 }

+std::vector<float> mix_planar_stereo_to_mono(const float* audio, int T_audio)
+{
+    std::vector<float> mono(T_audio);
+    const float* left  = audio;
+    const float* right = audio + T_audio;
+    for (int t = 0; t < T_audio; ++t)
+    {
+        mono[t] = 0.5f * (left[t] + right[t]);
+    }
+    return mono;
+}
+
 static uint8_t linear_to_mulaw(int16_t sample)
 {
    const int16_t BIAS = 0x84;        // 132
--- a/otherarch/utils.h
+++ b/otherarch/utils.h
@ -63,6 +63,7 @@ std::string kcpp_base64_encode(const std::string &data);
 std::string get_timestamp_str();
 std::vector<std::vector<int>> split_big_vector(const std::vector<int>& big_arr, size_t chunk_size);
 std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_rate, uint32_t output_rate);
+std::vector<float> mix_planar_stereo_to_mono(const float* audio, int T_audio);

 int32_t kcpp_quick_sample(float * logits, const int n_logits, const std::vector<int32_t> & last_n_tokens, float rep_pen, float top_p, int top_k, float temp, std::mt19937 & rng);