diff --git a/otherarch/acestep/dit-vae.cpp b/otherarch/acestep/dit-vae.cpp index 65481ddad..a33c4b5c0 100644 --- a/otherarch/acestep/dit-vae.cpp +++ b/otherarch/acestep/dit-vae.cpp @@ -764,7 +764,7 @@ std::string acestep_generate_audio(const music_generation_inputs inputs) const char * language = req.vocal_language.empty() ? "en" : req.vocal_language.c_str(); float duration = req.duration > 0 ? req.duration : 60.0f; long long seed = req.seed; - int num_steps = req.inference_steps > 0 ? req.inference_steps : 8; + int num_steps = req.inference_steps > 0 ? req.inference_steps : 10; float guidance_scale = req.guidance_scale > 0 ? req.guidance_scale : 7.0f; float shift = req.shift > 0 ? req.shift : 1.0f; diff --git a/otherarch/acestep/request.cpp b/otherarch/acestep/request.cpp index eae6ea952..9825b8cdd 100644 --- a/otherarch/acestep/request.cpp +++ b/otherarch/acestep/request.cpp @@ -29,7 +29,7 @@ void request_init(AceRequest * r) { r->lm_top_k = 0; r->lm_negative_prompt = ""; r->audio_codes = ""; - r->inference_steps = 8; + r->inference_steps = 10; r->guidance_scale = 1.0f; r->shift = 3.0f; } diff --git a/otherarch/utils.cpp b/otherarch/utils.cpp index 77f5d7c11..3ada28deb 100644 --- a/otherarch/utils.cpp +++ b/otherarch/utils.cpp @@ -366,26 +366,33 @@ std::vector> split_big_vector(const std::vector& big_arr, return small_arrs; } -std::vector resample_wav(const std::vector& input, uint32_t input_rate, uint32_t output_rate) { +std::vector resample_wav(const std::vector & input, uint32_t input_rate, uint32_t output_rate) { + if (input.empty() || input_rate == 0 || output_rate == 0) + return {}; - size_t input_size = input.size(); - - double ratio = static_cast(output_rate) / input_rate; - size_t newLength = static_cast(input.size() * ratio); - std::vector output(newLength); - - // Perform simple linear interpolation resampling - for (size_t i = 0; i < newLength; ++i) { - double srcIndex = i / ratio; - size_t srcIndexInt = static_cast(srcIndex); - double frac = srcIndex - srcIndexInt; - if (srcIndexInt + 1 < input_size) { - output[i] = static_cast(input[srcIndexInt] * (1 - frac) + input[srcIndexInt + 1] * frac); - } else { - output[i] = input[srcIndexInt]; + const size_t input_size = input.size(); + const double ratio = static_cast(output_rate) / input_rate; // Compute resampling ratio + // Use rounding to avoid systematic truncation error + const size_t output_size = static_cast(std::llround(input_size * ratio)); + std::vector output(output_size); + const double step = static_cast(input_rate) / output_rate; // Precompute step in source domain + double src_pos = 0.0; + for (size_t i = 0; i < output_size; ++i) + { + size_t idx = static_cast(src_pos); + if (idx >= input_size - 1) // Clamp to valid range (prevents out-of-bounds) + { + output[i] = input[input_size - 1]; } + else + { + const double frac = src_pos - idx; + const float s0 = input[idx]; + const float s1 = input[idx + 1]; + output[i] = static_cast(s0 + (s1 - s0) * frac); + } + src_pos += step; } - return output; }