stereo is working

This commit is contained in:
Concedo 2026-02-27 20:36:44 +08:00
parent 5a57ed8ca4
commit ba42f22fc8
7 changed files with 112 additions and 39 deletions

View file

@ -151,6 +151,14 @@ audio{width:100%;margin-top:6px;}
max-width:300px;
font-size:13px;
}
input[type="checkbox"] {
height: 16px;
accent-color: var(--accent);
cursor: pointer;
}
</style>
</head>
@ -190,6 +198,13 @@ audio{width:100%;margin-top:6px;}
<div><label>CFG</label><input id="lm_cfg_scale" type="number" step="0.1"></div>
<div><label>Top P</label><input id="lm_top_p" type="number" step="0.01"></div>
<div><label>Steps</label><input id="inference_steps" type="number"></div>
<div><label>Guidance</label><input id="guidance_scale" type="number"></div>
<div><label>Shift</label><input id="shift" type="number"></div>
<div><label>HD Stereo</label><input id="stereo" type="checkbox"></div>
<div><label>Gen Codes</label><input id="gen_codes" type="checkbox"></div>
</div>
<div>
<div><label>AudioCodes</label><textarea id="audio_codes"></textarea></div>
</div>
</div>
@ -287,13 +302,21 @@ function toggleAdvanced(){
function getFormData(){
const ids=["caption","lyrics","bpm","duration","keyscale","timesignature",
"vocal_language","seed","lm_temperature","lm_cfg_scale","lm_top_p","inference_steps"];
"vocal_language","seed","lm_temperature","lm_cfg_scale","lm_top_p","inference_steps",
"guidance_scale","shift","stereo","gen_codes","audio_codes"];
const data={};
ids.forEach(id=>{
const el=document.getElementById(id);
if(!el) return;
const v=el.value;
if(v!=="") data[id]=isNaN(v)?v:Number(v);
if(v=="on")
{
data[id]=true;
}else if(v=="off")
{
data[id]=false;
}
else if(v!=="") {data[id]=isNaN(v)?v:Number(v);}
});
return data;
}
@ -385,7 +408,7 @@ async function generateSong(){
}
}
function downloadTrackJSON(id){
function loadTrackJSON(id){
const tx = db.transaction(STORE, "readonly");
const store = tx.objectStore(STORE);
const req = store.get(id);
@ -397,17 +420,9 @@ function downloadTrackJSON(id){
return;
}
const blob = new Blob(
[JSON.stringify(item.params, null, 2)],
{ type: "application/json" }
);
const data=(item.params);
updateForm(data);
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = `${item.title}.json`;
a.click();
URL.revokeObjectURL(url);
};
req.onerror = function(){
@ -444,8 +459,8 @@ function loadLibrary(){
<a href="${url}" download="${item.title}.wav">
<button class="secondary">Download</button>
</a>
<button class="secondary" onclick="downloadTrackJSON(${item.id})">
Get JSON
<button class="secondary" onclick="loadTrackJSON(${item.id})">
Load Params
</button>
<button class="danger" onclick="deleteTrack(${item.id})">Delete</button>

View file

@ -342,13 +342,15 @@ struct music_load_model_inputs
};
struct music_generation_inputs
{
const bool is_codes = false; //if true, generate codes, else, generate diffusion music
const bool is_planner_mode = false; //if true, generate codes, else, generate diffusion music
const bool stereo = false;
const bool gen_codes = false;
const char * input_json = nullptr;
};
struct music_generation_outputs
{
int status = -1;
const char * codes_json = "";
const char * music_output_json = "";
const char * data = "";
};

View file

@ -453,12 +453,14 @@ class music_load_model_inputs(ctypes.Structure):
("debugmode", ctypes.c_int)]
class music_generation_inputs(ctypes.Structure):
_fields_ = [("is_codes", ctypes.c_bool),
_fields_ = [("is_planner_mode", ctypes.c_bool),
("stereo", ctypes.c_bool),
("gen_codes", ctypes.c_bool),
("input_json", ctypes.c_char_p)]
class music_generation_outputs(ctypes.Structure):
_fields_ = [("status", ctypes.c_int),
("codes_json", ctypes.c_char_p),
("music_output_json", ctypes.c_char_p),
("data", ctypes.c_char_p)]
class StdoutRedirector:
@ -2383,12 +2385,14 @@ def music_generate_codes(genparams):
global args
input_json = json.dumps(genparams)
inputs = music_generation_inputs()
inputs.is_codes = True
inputs.is_planner_mode = True
inputs.stereo = genparams.get('stereo', False)
inputs.gen_codes = genparams.get('gen_codes', False)
inputs.input_json = input_json.encode("UTF-8")
ret = handle.music_generate(inputs)
outstr = ""
if ret.status==1:
outstr = ret.codes_json.decode("UTF-8","ignore")
outstr = ret.music_output_json.decode("UTF-8","ignore")
outstr = json.dumps(json.loads(outstr))
return outstr
@ -2396,7 +2400,9 @@ def music_generate_audio(genparams):
global args
input_json = json.dumps(genparams)
inputs = music_generation_inputs()
inputs.is_codes = False
inputs.is_planner_mode = False
inputs.stereo = genparams.get('stereo', False)
inputs.gen_codes = genparams.get('gen_codes', False)
inputs.input_json = input_json.encode("UTF-8")
ret = handle.music_generate(inputs)
outstr = ""

View file

@ -868,8 +868,8 @@ std::string acestep_generate_audio(const music_generation_inputs inputs)
// Context building
// Silence latent for this T
std::vector<float> silence(Oc * T);
memcpy(silence.data(), silence_full.data(), (size_t)(Oc * T) * sizeof(float));
// std::vector<float> silence(Oc * T);
// memcpy(silence.data(), silence_full.data(), (size_t)(Oc * T) * sizeof(float));
// Decode audio codes if provided
int decoded_T = 0;
@ -895,7 +895,7 @@ std::string acestep_generate_audio(const music_generation_inputs inputs)
for (int t = 0; t < T; t++) {
const float * src = (t < decoded_T)
? decoded_latents.data() + t * Oc
: silence.data() + t * Oc;
: silence_full.data() + (t - decoded_T) * Oc;
for (int c = 0; c < Oc; c++)
context_single[t * ctx_ch + c] = src[c];
for (int c = 0; c < Oc; c++)
@ -984,9 +984,15 @@ std::string acestep_generate_audio(const music_generation_inputs inputs)
// output wav
float muslen = (float)T_audio / 48000.0f;
std::vector<float> mono = mix_planar_stereo_to_mono(audio.data(), T_audio);
std::vector<float> resampled_buf = resample_wav(mono,48000,32000);
std::string finalb64 = save_wav16_base64(resampled_buf, 32000);
std::string finalb64;
if(inputs.stereo)
{
finalb64 = save_stereo_wav16_base64(audio,T_audio,48000);
} else {
std::vector<float> mono = mix_planar_stereo_to_mono(audio.data(), T_audio);
std::vector<float> resampled_buf = resample_wav(mono,48000,32000);
finalb64 = save_wav16_base64(resampled_buf, 32000);
}
if(acestep_dit_lowvram)
{

View file

@ -25,7 +25,7 @@ static bool music_is_quiet = false;
static bool musicgen_loaded = false;
static std::string musicvulkandeviceenv;
static std::string codes_json_str = "";
static std::string music_output_json_str = "";
static std::string b64_music_output = "";
bool musictype_load_model(const music_load_model_inputs inputs)
@ -96,29 +96,29 @@ music_generation_outputs musictype_generate(const music_generation_inputs inputs
{
printf("\nWarning: KCPP music gen not initialized!\n");
output.status = 0;
output.codes_json = "";
output.music_output_json = "";
output.data = "";
return output;
}
if (inputs.is_codes) {
if (inputs.is_planner_mode) {
if (!music_is_quiet) {
printf("\nMusic Gen Generating Codes...");
}
codes_json_str = acestep_prepare_request(inputs);
if(codes_json_str=="")
music_output_json_str = acestep_prepare_request(inputs);
if(music_output_json_str=="")
{
printf("\nMusic codes generation failed!\n");
output.status = 0;
output.codes_json = "";
output.music_output_json = "";
output.data = "";
return output;
}
output.status = 1;
output.data = "";
output.codes_json = codes_json_str.c_str();
output.music_output_json = music_output_json_str.c_str();
if (!music_is_quiet) {
printf("\nMusic Gen Codes Done:\n%s\n",codes_json_str.c_str());
printf("\nMusic Gen Codes Done:\n%s\n",music_output_json_str.c_str());
}
} else {
if (!music_is_quiet) {
@ -129,13 +129,13 @@ music_generation_outputs musictype_generate(const music_generation_inputs inputs
{
printf("\nMusic audio generation failed!\n");
output.status = 0;
output.codes_json = "";
output.music_output_json = "";
output.data = "";
return output;
}
output.status = 1;
output.data = b64_music_output.c_str();
output.codes_json = "";
output.music_output_json = "";
if (!music_is_quiet) {
printf("\nMusic Gen Audio Done\n");
}

View file

@ -487,6 +487,49 @@ std::string save_wav16_base64(const std::vector<float> &data, int sample_rate) {
return kcpp_base64_encode(wav_data); //return as base64 string
}
//assumes planar stereo input from acestep
std::string save_stereo_wav16_base64(const std::vector<float> & raw_audio, int T_audio, int sample_rate) {
std::ostringstream oss(std::ios::binary);
const int n_channels = 2;
const int bits = 16;
const int byte_rate = sample_rate * n_channels * (bits / 8);
const int block_align = n_channels * (bits / 8);
const int data_size = T_audio * n_channels * (bits / 8);
const int file_size = 36 + data_size;
oss.write("RIFF", 4);
oss.write(reinterpret_cast<const char*>(&file_size), 4);
oss.write("WAVE", 4);
oss.write("fmt ", 4);
int32_t fmt_size = 16;
oss.write(reinterpret_cast<const char*>(&fmt_size), 4);
int16_t audio_fmt = 1; // PCM
oss.write(reinterpret_cast<const char*>(&audio_fmt), 2);
int16_t nc = n_channels;
oss.write(reinterpret_cast<const char*>(&nc), 2);
oss.write(reinterpret_cast<const char*>(&sample_rate), 4);
oss.write(reinterpret_cast<const char*>(&byte_rate), 4);
int16_t ba = block_align;
oss.write(reinterpret_cast<const char*>(&ba), 2);
int16_t bp = bits;
oss.write(reinterpret_cast<const char*>(&bp), 2);
oss.write("data", 4);
oss.write(reinterpret_cast<const char*>(&data_size), 4);
// EXPECTS PLANAR INPUT:
// raw_audio[0 ... T_audio-1] = Left
// raw_audio[T_audio ... 2*T_audio-1] = Right
for (int t = 0; t < T_audio; ++t) {
for (int c = 0; c < 2; ++c) {
float s = raw_audio[c * T_audio + t];
s = std::max(-1.0f, std::min(1.0f, s)); // clamp to [-1, 1]
int16_t v = static_cast<int16_t>(s * 32767.0f);
oss.write(reinterpret_cast<const char*>(&v), 2);
}
}
std::string wav_data = oss.str();
return kcpp_base64_encode(wav_data);
}
//a very rudimentary all in one sampling function which has no dependencies
int32_t kcpp_quick_sample(float * logits, const int n_logits, const std::vector<int32_t> & last_n_tokens, float rep_pen, float top_p, int top_k, float temp, std::mt19937 & rng)
{

View file

@ -156,4 +156,5 @@ struct wav_ulaw_header {
#pragma pack(pop)
std::string save_ulaw_wav8_base64(const std::vector<float> &data, int sample_rate);
std::string save_wav16_base64(const std::vector<float> &data, int sample_rate);
std::string save_wav16_base64(const std::vector<float> &data, int sample_rate);
std::string save_stereo_wav16_base64(const std::vector<float> & raw_audio, int T_audio, int sample_rate);