mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 09:59:50 +00:00
all working, but needs to optimize vram
This commit is contained in:
parent
488c431331
commit
aa58d1ed3b
5 changed files with 40 additions and 6 deletions
|
|
@ -1484,6 +1484,10 @@ std::string acestep_prepare_request(const music_generation_inputs inputs)
|
|||
ace.timesignature = req.timesignature;
|
||||
ace.vocal_language = req.vocal_language;
|
||||
|
||||
//kcpp: codes suck don't use them
|
||||
req.thinking = false;
|
||||
req.audio_codes = "";
|
||||
|
||||
bool user_has_codes = !req.audio_codes.empty();
|
||||
bool need_lm_codes = req.thinking && !user_has_codes;
|
||||
|
||||
|
|
@ -1578,6 +1582,12 @@ std::string acestep_prepare_request(const music_generation_inputs inputs)
|
|||
if (!batch_codes[0].empty()) rr.audio_codes = batch_codes[0];
|
||||
rr.seed = seed;
|
||||
|
||||
std::string prefix_erase = "# Lyric";
|
||||
// Check if the string is long enough and starts with the prefix
|
||||
if (rr.lyrics.size() >= prefix_erase.size() && rr.lyrics.compare(0, prefix_erase.size(), prefix_erase) == 0) {
|
||||
rr.lyrics = rr.lyrics.substr(prefix_erase.size()); // Returns a new string starting after the prefix
|
||||
}
|
||||
|
||||
//now convert to string
|
||||
std::ostringstream oss;
|
||||
oss << "{\n";
|
||||
|
|
|
|||
|
|
@ -692,6 +692,7 @@ std::string acestep_generate_audio(const music_generation_inputs inputs)
|
|||
if (req.caption.empty()) {
|
||||
req.caption = "An interesting song";
|
||||
}
|
||||
req.thinking = false;
|
||||
|
||||
const int FRAMES_PER_SECOND = 25;
|
||||
int Oc = music_dit_cfg.out_channels; // 64
|
||||
|
|
@ -917,11 +918,21 @@ std::string acestep_generate_audio(const music_generation_inputs inputs)
|
|||
}
|
||||
}
|
||||
|
||||
// output wav
|
||||
std::vector<float> resampled_buf = resample_wav(audio,48000,24000);
|
||||
std::string finalb64 = save_ulaw_wav8_base64(audio, 24000);
|
||||
// std::string opath = "egghenlo.wav";
|
||||
// if (write_wav(opath.c_str(), audio.data(), T_audio, 48000)) {
|
||||
// fprintf(stderr, "[VAE Batch%d] Wrote %s: %d samples (%.2fs @ 48kHz stereo)\n",
|
||||
// b, opath.c_str(), T_audio, (float)T_audio / 48000.0f);
|
||||
// } else {
|
||||
// fprintf(stderr, "[VAE Batch%d] FATAL: failed to write %s\n", b, opath.c_str());
|
||||
// }
|
||||
|
||||
fprintf(stderr, "[Request Done]\n");
|
||||
// output wav
|
||||
float muslen = (float)T_audio / 48000.0f;
|
||||
std::vector<float> mono = mix_planar_stereo_to_mono(audio.data(), T_audio);
|
||||
std::vector<float> resampled_buf = resample_wav(mono,48000,32000);
|
||||
std::string finalb64 = save_wav16_base64(resampled_buf, 32000);
|
||||
|
||||
fprintf(stderr, "[Request Done: Music Length %.2fs]\n",muslen);
|
||||
return finalb64;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,12 +22,12 @@ void request_init(AceRequest * r) {
|
|||
r->vocal_language = "unknown";
|
||||
r->task_type = "text2music";
|
||||
r->seed = -1;
|
||||
r->thinking = true;
|
||||
r->thinking = false;
|
||||
r->lm_temperature = 0.85f;
|
||||
r->lm_cfg_scale = 2.0f;
|
||||
r->lm_top_p = 0.9f;
|
||||
r->lm_top_k = 0;
|
||||
r->lm_negative_prompt = "NO USER INPUT";
|
||||
r->lm_negative_prompt = "";
|
||||
r->audio_codes = "";
|
||||
r->inference_steps = 8;
|
||||
r->guidance_scale = 1.0f;
|
||||
|
|
|
|||
|
|
@ -389,6 +389,18 @@ std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_
|
|||
return output;
|
||||
}
|
||||
|
||||
std::vector<float> mix_planar_stereo_to_mono(const float* audio, int T_audio)
|
||||
{
|
||||
std::vector<float> mono(T_audio);
|
||||
const float* left = audio;
|
||||
const float* right = audio + T_audio;
|
||||
for (int t = 0; t < T_audio; ++t)
|
||||
{
|
||||
mono[t] = 0.5f * (left[t] + right[t]);
|
||||
}
|
||||
return mono;
|
||||
}
|
||||
|
||||
static uint8_t linear_to_mulaw(int16_t sample)
|
||||
{
|
||||
const int16_t BIAS = 0x84; // 132
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ std::string kcpp_base64_encode(const std::string &data);
|
|||
std::string get_timestamp_str();
|
||||
std::vector<std::vector<int>> split_big_vector(const std::vector<int>& big_arr, size_t chunk_size);
|
||||
std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_rate, uint32_t output_rate);
|
||||
std::vector<float> mix_planar_stereo_to_mono(const float* audio, int T_audio);
|
||||
|
||||
int32_t kcpp_quick_sample(float * logits, const int n_logits, const std::vector<int32_t> & last_n_tokens, float rep_pen, float top_p, int top_k, float temp, std::mt19937 & rng);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue