add some built in voices

This commit is contained in:
Concedo 2025-01-15 00:17:17 +08:00
parent 0a6ccda203
commit ed9f7a38ae
6 changed files with 94 additions and 43 deletions

View file

@ -41,35 +41,6 @@ static bool is_wav_buffer(const std::string buf) {
return true;
}
static std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_rate, uint32_t output_rate) {
size_t input_size = input.size();
double ratio = static_cast<double>(output_rate) / input_rate;
size_t newLength = static_cast<size_t>(input.size() * ratio);
std::vector<float> output(newLength);
if(whisperdebugmode==1)
{
printf("\nResample wav from %" PRIu32 " to %" PRIu32 " (in size: %zu, out size: %zu)",
input_rate, output_rate, input_size, static_cast<std::size_t>(output.size()));
}
// Perform simple linear interpolation resampling
for (size_t i = 0; i < newLength; ++i) {
double srcIndex = i / ratio;
size_t srcIndexInt = static_cast<size_t>(srcIndex);
double frac = srcIndex - srcIndexInt;
if (srcIndexInt + 1 < input_size) {
output[i] = static_cast<float>(input[srcIndexInt] * (1 - frac) + input[srcIndexInt + 1] * frac);
} else {
output[i] = input[srcIndexInt];
}
}
return output;
}
static bool read_wav(const std::string & b64data, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo)
{
drwav wav;
@ -119,6 +90,11 @@ static bool read_wav(const std::string & b64data, std::vector<float>& pcmf32, st
}
if (wav.sampleRate != COMMON_SAMPLE_RATE) {
if(whisperdebugmode==1)
{
printf("\nResample wav from %" PRIu32 " to %" PRIu32 " (in size: %zu)",
wav.sampleRate, COMMON_SAMPLE_RATE, raw_pcm.size());
}
raw_pcm = resample_wav(raw_pcm, wav.sampleRate, COMMON_SAMPLE_RATE);
}