mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
add some built in voices
This commit is contained in:
parent
0a6ccda203
commit
ed9f7a38ae
6 changed files with 94 additions and 43 deletions
29
klite.embd
29
klite.embd
|
@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
|
|||
-->
|
||||
|
||||
<script>
|
||||
const LITEVER = 202;
|
||||
const LITEVER = 203;
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
var localflag = true;
|
||||
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
|
||||
|
@ -11938,6 +11938,7 @@ initializeInstructUIFunctionality();
|
|||
}else{
|
||||
document.getElementById("nokcpptts").classList.remove("hidden");
|
||||
}
|
||||
adjust_kcpptts_controls();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11981,6 +11982,15 @@ initializeInstructUIFunctionality();
|
|||
rvcPitch.disabled = streamingMode;
|
||||
}
|
||||
|
||||
function adjust_kcpptts_controls() {
|
||||
if (document.getElementById("kcpp_tts_voice").value == "custom") {
|
||||
document.getElementById("kcpp_tts_voice_custom").classList.remove("hidden");
|
||||
} else {
|
||||
document.getElementById("kcpp_tts_voice_custom").classList.add("hidden");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Update set_xtts_url to use the new fetch_rvc_voices function
|
||||
function set_xtts_url() {
|
||||
let is_xtts = (document.getElementById("ttsselect").value==XTTS_ID);
|
||||
|
@ -12073,7 +12083,7 @@ initializeInstructUIFunctionality();
|
|||
payload =
|
||||
{
|
||||
"input": text,
|
||||
"voice": document.getElementById("kcpp_tts_voice").value
|
||||
"voice": (document.getElementById("kcpp_tts_voice").value == "custom")?document.getElementById("kcpp_tts_voice_custom").value:document.getElementById("kcpp_tts_voice").value
|
||||
};
|
||||
ttsheaders = get_kobold_header();
|
||||
}
|
||||
|
@ -14496,7 +14506,7 @@ initializeInstructUIFunctionality();
|
|||
gentxt = trim_extra_stop_seqs(gentxt,false);
|
||||
|
||||
//fix alpaca leakage
|
||||
if(localsettings.fix_alpaca_leak && get_instruct_starttag(true).toLowerCase().includes("### instruction"))
|
||||
if(localsettings.fix_alpaca_leak && (localsettings.opmode == 2 || localsettings.opmode == 3 || localsettings.opmode == 4) && get_instruct_starttag(true).toLowerCase().includes("### instruction"))
|
||||
{
|
||||
let matches = gentxt.match(/\n### (.+?):/g);
|
||||
for(let m in matches)
|
||||
|
@ -20249,7 +20259,18 @@ initializeInstructUIFunctionality();
|
|||
<div class="color_red hidden" id="nokcpptts">KoboldCpp Not Connected</div>
|
||||
<div class="settinglabel">
|
||||
<table width="100%">
|
||||
<tr style="font-size:12px;padding:2px;margin:0px 0 0;"><td>TTS Voice </td><td><input class="settinglabel miniinput" type="text" value="kobo" placeholder="(Anything)" id="kcpp_tts_voice" style="margin-left:3px; height:18px; width: 80px; padding: 2px;"></td></tr>
|
||||
<tr style="font-size:12px;padding:2px;margin:0px 0 0;"><td>TTS Voice </td><td>
|
||||
<select onchange="adjust_kcpptts_controls();" class="form-control" id="kcpp_tts_voice" style="font-size:12px;height:20px;padding:0;margin:0px 0 0;">
|
||||
<option value="kobo" selected>kobo</option>
|
||||
<option value="cheery">cheery</option>
|
||||
<option value="sleepy">sleepy</option>
|
||||
<option value="tutor">tutor</option>
|
||||
<option value="shouty">shouty</option>
|
||||
<option value="bored">bored</option>
|
||||
<option value="record">record</option>
|
||||
<option value="custom">custom</option>
|
||||
</select></td>
|
||||
<td><input class="settinglabel miniinput" type="text" value="" placeholder="(Name)" id="kcpp_tts_voice_custom" style="margin-left:3px; height:18px; width:44px; padding: 2px;"></td></tr>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -1356,8 +1356,10 @@ def tts_generate(genparams):
|
|||
prompt = prompt.strip()
|
||||
voice = 1
|
||||
voicestr = genparams.get("voice", genparams.get("speaker_wav", ""))
|
||||
if voicestr and voicestr.strip().lower()=="kobo":
|
||||
voice = 1
|
||||
voice_mapping = ["kobo","cheery","sleepy","tutor","shouty","bored","record"]
|
||||
normalized_voice = voicestr.strip().lower() if voicestr else ""
|
||||
if normalized_voice in voice_mapping:
|
||||
voice = voice_mapping.index(normalized_voice) + 1
|
||||
else:
|
||||
voice = simple_lcg_hash(voicestr.strip()) if voicestr else 1
|
||||
inputs = tts_generation_inputs()
|
||||
|
@ -2320,7 +2322,7 @@ Enter Prompt:<br>
|
|||
response_body = (json.dumps([]).encode())
|
||||
|
||||
elif self.path.endswith(('/speakers_list')): #xtts compatible
|
||||
response_body = (json.dumps(["kobo","bean","corn","spicy","lime","fire","metal","potato"]).encode()) #some random voices for them to enjoy
|
||||
response_body = (json.dumps(["kobo","cheery","sleepy","tutor","shouty","bored","record"]).encode()) #some random voices for them to enjoy
|
||||
|
||||
elif self.path.endswith(('/api/tags')): #ollama compatible
|
||||
response_body = (json.dumps({"models":[{"name":"koboldcpp","model":friendlymodelname,"modified_at":"2024-07-19T15:26:55.6122841+08:00","size":394998579,"digest":"b5dc5e784f2a3ee1582373093acf69a2f4e2ac1710b253a001712b86a61f88bb","details":{"parent_model":"","format":"gguf","family":"koboldcpp","families":["koboldcpp"],"parameter_size":"128M","quantization_level":"Q4_0"}}]}).encode())
|
||||
|
|
|
@ -599,8 +599,32 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
|
|||
{
|
||||
printf("\nReuse speaker ID=%d (%d tokens)...", last_speaker_seed, last_speaker_codes.size());
|
||||
}
|
||||
} else if (speaker_seed==1){ //1 is a special seed
|
||||
std::string speaker = "but<|t_0.31|><|code_start|><|1023|><|1474|><|17|><|121|><|1362|><|744|><|438|><|1319|><|744|><|1419|><|1246|><|923|><|1338|><|406|><|939|><|975|><|1491|><|965|><|1212|><|248|><|794|><|464|><|830|><|code_end|>\nthat<|t_0.13|><|code_start|><|1578|><|1773|><|660|><|1074|><|221|><|1803|><|142|><|914|><|798|><|485|><|code_end|>\nis<|t_0.11|><|code_start|><|737|><|794|><|1288|><|182|><|895|><|1653|><|448|><|471|><|code_end|>\nwhat<|t_0.12|><|code_start|><|1734|><|1306|><|779|><|490|><|525|><|1028|><|37|><|1633|><|1353|><|code_end|>\nit<|t_0.09|><|code_start|><|1343|><|898|><|270|><|1035|><|94|><|1409|><|388|><|code_end|>\nis<|t_0.23|><|code_start|><|694|><|695|><|577|><|692|><|1047|><|388|><|28|><|905|><|1155|><|50|><|1629|><|1775|><|1711|><|1729|><|404|><|1027|><|344|><|code_end|>";
|
||||
} else if (speaker_seed>=1 && speaker_seed<=7){ //special seeds
|
||||
std::string speaker = "";
|
||||
switch(speaker_seed)
|
||||
{
|
||||
case 1:
|
||||
speaker = "but<|t_0.31|><|code_start|><|1023|><|1474|><|17|><|121|><|1362|><|744|><|438|><|1319|><|744|><|1419|><|1246|><|923|><|1338|><|406|><|939|><|975|><|1491|><|965|><|1212|><|248|><|794|><|464|><|830|><|code_end|>\nthat<|t_0.13|><|code_start|><|1578|><|1773|><|660|><|1074|><|221|><|1803|><|142|><|914|><|798|><|485|><|code_end|>\nis<|t_0.11|><|code_start|><|737|><|794|><|1288|><|182|><|895|><|1653|><|448|><|471|><|code_end|>\nwhat<|t_0.12|><|code_start|><|1734|><|1306|><|779|><|490|><|525|><|1028|><|37|><|1633|><|1353|><|code_end|>\nit<|t_0.09|><|code_start|><|1343|><|898|><|270|><|1035|><|94|><|1409|><|388|><|code_end|>\nis<|t_0.23|><|code_start|><|694|><|695|><|577|><|692|><|1047|><|388|><|28|><|905|><|1155|><|50|><|1629|><|1775|><|1711|><|1729|><|404|><|1027|><|344|><|code_end|>";
|
||||
break;
|
||||
case 2:
|
||||
speaker = "but<|t_0.23|><|code_start|><|762|><|612|><|316|><|1128|><|171|><|250|><|1765|><|60|><|1075|><|81|><|1159|><|140|><|81|><|1158|><|678|><|1639|><|970|><|code_end|>\nthat<|t_0.21|><|code_start|><|1254|><|460|><|378|><|1621|><|1477|><|210|><|270|><|571|><|179|><|324|><|408|><|81|><|642|><|408|><|794|><|1506|><|code_end|>\nis<|t_0.16|><|code_start|><|36|><|57|><|1132|><|881|><|844|><|260|><|79|><|1794|><|1195|><|333|><|1808|><|1375|><|code_end|>\nwhat<|t_0.23|><|code_start|><|485|><|1583|><|1091|><|736|><|668|><|1703|><|670|><|832|><|959|><|853|><|983|><|969|><|576|><|697|><|721|><|1032|><|990|><|code_end|>\nit<|t_0.16|><|code_start|><|772|><|741|><|794|><|1015|><|110|><|965|><|1060|><|62|><|1305|><|470|><|284|><|259|><|code_end|>\nis<|t_0.35|><|code_start|><|516|><|1099|><|405|><|1831|><|1051|><|1471|><|26|><|1207|><|809|><|0|><|1303|><|1329|><|1196|><|798|><|679|><|992|><|1358|><|930|><|1065|><|942|><|1573|><|823|><|823|><|1527|><|1617|><|865|><|code_end|>";
|
||||
break;
|
||||
case 3:
|
||||
speaker = "but<|t_0.32|><|code_start|><|862|><|899|><|1601|><|1749|><|121|><|1176|><|1601|><|1007|><|1722|><|121|><|1142|><|1465|><|696|><|1284|><|1698|><|1275|><|860|><|113|><|590|><|1356|><|577|><|1346|><|1433|><|1779|><|code_end|>\nthat<|t_0.40|><|code_start|><|1248|><|1181|><|1792|><|735|><|1289|><|1346|><|975|><|1751|><|1587|><|1042|><|221|><|29|><|991|><|797|><|1184|><|1171|><|152|><|352|><|1119|><|1282|><|110|><|73|><|524|><|1424|><|1276|><|996|><|777|><|1119|><|1166|><|859|><|code_end|>\nis<|t_0.61|><|code_start|><|1666|><|1819|><|566|><|1333|><|1658|><|981|><|1705|><|1185|><|939|><|1813|><|899|><|1465|><|1176|><|712|><|1390|><|1578|><|1275|><|92|><|1729|><|1200|><|1615|><|1484|><|1200|><|1574|><|1307|><|1221|><|1606|><|1307|><|428|><|1759|><|1127|><|1574|><|1581|><|127|><|1507|><|1060|><|1769|><|34|><|1583|><|1579|><|1828|><|1580|><|652|><|1688|><|1527|><|1547|><|code_end|>\nwhat<|t_0.93|><|code_start|><|1691|><|731|><|1592|><|1573|><|1547|><|1617|><|1528|><|1547|><|1664|><|867|><|1571|><|1637|><|273|><|1354|><|1573|><|34|><|1724|><|1669|><|1538|><|1293|><|1623|><|1536|><|1233|><|1176|><|1348|><|1011|><|1722|><|899|><|1176|><|1419|><|899|><|1763|><|1293|><|1601|><|1543|><|939|><|1543|><|1419|><|799|><|1722|><|1233|><|1011|><|1543|><|1007|><|1176|><|1628|><|1114|><|1763|><|862|><|957|><|1693|><|274|><|1176|><|1719|><|805|><|1706|><|1472|><|1249|><|1365|><|877|><|269|><|197|><|1068|><|969|><|1591|><|1192|><|996|><|1764|><|1455|><|1643|><|code_end|>\nit<|t_0.15|><|code_start|><|804|><|1141|><|1566|><|1013|><|529|><|1650|><|1149|><|1744|><|763|><|1640|><|1692|><|code_end|>\nis<|t_0.40|><|code_start|><|1218|><|774|><|1576|><|1192|><|286|><|1831|><|1407|><|92|><|803|><|1311|><|26|><|546|><|1124|><|978|><|319|><|1062|><|1675|><|1608|><|1158|><|1456|><|1572|><|1199|><|1603|><|1592|><|1664|><|1586|><|1571|><|1354|><|34|><|1627|><|code_end|>";
|
||||
break;
|
||||
case 4:
|
||||
speaker = "but<|t_0.24|><|code_start|><|710|><|505|><|555|><|1255|><|1474|><|1315|><|1740|><|530|><|1446|><|1651|><|991|><|186|><|1310|><|816|><|175|><|935|><|776|><|672|><|code_end|>\nthat<|t_0.40|><|code_start|><|1440|><|807|><|712|><|1525|><|177|><|584|><|1006|><|1288|><|1664|><|1732|><|951|><|79|><|797|><|790|><|172|><|1111|><|106|><|1222|><|186|><|186|><|1122|><|1153|><|81|><|1055|><|1355|><|1757|><|861|><|1067|><|971|><|563|><|code_end|>\nis<|t_0.36|><|code_start|><|915|><|396|><|869|><|1779|><|805|><|1489|><|1157|><|1142|><|1011|><|555|><|686|><|1578|><|1428|><|1624|><|1252|><|949|><|175|><|239|><|154|><|1280|><|716|><|1729|><|1445|><|1791|><|1679|><|1769|><|884|><|code_end|>\nwhat<|t_0.36|><|code_start|><|1710|><|1734|><|1364|><|1789|><|1805|><|1628|><|1025|><|859|><|1595|><|987|><|136|><|1584|><|635|><|1006|><|1789|><|552|><|871|><|1505|><|1206|><|474|><|705|><|803|><|1305|><|1595|><|627|><|1137|><|486|><|code_end|>\nit<|t_0.47|><|code_start|><|676|><|1746|><|1672|><|1465|><|1346|><|673|><|957|><|1293|><|1348|><|1628|><|710|><|1233|><|1628|><|727|><|1338|><|1536|><|673|><|686|><|1273|><|1114|><|1523|><|1338|><|1510|><|273|><|1487|><|1656|><|1573|><|1786|><|813|><|1284|><|1442|><|17|><|325|><|975|><|555|><|code_end|>\nis<|t_0.47|><|code_start|><|1747|><|1419|><|1465|><|1538|><|17|><|862|><|1419|><|986|><|1628|><|1157|><|933|><|1176|><|939|><|899|><|625|><|939|><|1085|><|101|><|1224|><|1744|><|1777|><|1462|><|176|><|1618|><|972|><|1623|><|1580|><|1252|><|1479|><|1702|><|1802|><|895|><|1673|><|1510|><|1513|><|code_end|>";
|
||||
break;
|
||||
case 5:
|
||||
speaker = "but<|t_0.20|><|code_start|><|686|><|1288|><|1251|><|1428|><|481|><|702|><|1812|><|829|><|81|><|756|><|76|><|104|><|952|><|1723|><|1632|><|code_end|>\nthat<|t_0.20|><|code_start|><|1006|><|1067|><|1614|><|1810|><|887|><|43|><|1192|><|106|><|400|><|43|><|730|><|660|><|186|><|87|><|467|><|code_end|>\nis<|t_0.27|><|code_start|><|648|><|1625|><|9|><|685|><|243|><|106|><|996|><|990|><|228|><|809|><|1009|><|2|><|806|><|1325|><|1332|><|1766|><|202|><|725|><|416|><|822|><|code_end|>\nwhat<|t_0.36|><|code_start|><|1287|><|328|><|1241|><|1661|><|1651|><|1708|><|1740|><|1685|><|1715|><|1787|><|1381|><|197|><|1769|><|525|><|1000|><|234|><|364|><|115|><|212|><|632|><|1153|><|228|><|73|><|1002|><|1800|><|1277|><|1117|><|code_end|>\nit<|t_0.40|><|code_start|><|1830|><|1199|><|1282|><|1163|><|1195|><|1752|><|1092|><|1481|><|1003|><|513|><|1639|><|1805|><|1485|><|1645|><|195|><|1464|><|181|><|195|><|123|><|87|><|433|><|878|><|170|><|1265|><|375|><|1708|><|1739|><|1519|><|1185|><|1099|><|code_end|>\nis<|t_0.76|><|code_start|><|1748|><|1422|><|276|><|1337|><|1322|><|1519|><|1779|><|1067|><|1724|><|891|><|1205|><|1419|><|1144|><|1667|><|591|><|1003|><|1543|><|566|><|1390|><|426|><|1824|><|182|><|1138|><|52|><|129|><|1056|><|155|><|1056|><|1298|><|919|><|155|><|125|><|500|><|1022|><|571|><|315|><|400|><|100|><|617|><|295|><|757|><|324|><|592|><|1298|><|1310|><|57|><|876|><|1175|><|1353|><|1770|><|1649|><|1828|><|1637|><|362|><|1744|><|884|><|1027|><|code_end|>";
|
||||
break;
|
||||
case 6:
|
||||
speaker = "but<|t_0.39|><|code_start|><|1338|><|1319|><|805|><|1176|><|799|><|591|><|325|><|1023|><|274|><|1348|><|1246|><|1176|><|591|><|555|><|758|><|591|><|438|><|710|><|727|><|1419|><|1157|><|1157|><|1293|><|633|><|1003|><|832|><|871|><|1399|><|1315|><|code_end|>\nthat<|t_0.20|><|code_start|><|1352|><|668|><|859|><|1793|><|1455|><|260|><|1117|><|260|><|186|><|1209|><|106|><|1098|><|260|><|1088|><|752|><|code_end|>\nis<|t_0.17|><|code_start|><|949|><|869|><|352|><|821|><|475|><|788|><|1150|><|1286|><|1079|><|1726|><|328|><|1624|><|1641|><|code_end|>\nwhat<|t_0.47|><|code_start|><|1175|><|1710|><|640|><|231|><|1781|><|884|><|1649|><|930|><|1270|><|1824|><|1383|><|1748|><|1011|><|1176|><|1023|><|986|><|1419|><|1425|><|686|><|899|><|627|><|1419|><|1023|><|799|><|1338|><|1163|><|1464|><|627|><|840|><|361|><|693|><|159|><|1041|><|562|><|1444|><|code_end|>\nit<|t_0.12|><|code_start|><|1078|><|685|><|982|><|277|><|1494|><|793|><|229|><|853|><|308|><|code_end|>\nis<|t_0.23|><|code_start|><|1291|><|1308|><|902|><|531|><|1022|><|231|><|992|><|1671|><|967|><|992|><|1646|><|1654|><|1791|><|701|><|1624|><|1565|><|1532|><|code_end|>";
|
||||
break;
|
||||
case 7:
|
||||
speaker = "but<|t_0.31|><|code_start|><|174|><|544|><|68|><|391|><|131|><|187|><|559|><|534|><|223|><|1185|><|612|><|301|><|387|><|94|><|1224|><|1159|><|162|><|236|><|1133|><|774|><|888|><|144|><|1038|><|code_end|>\nthat<|t_0.20|><|code_start|><|223|><|77|><|1517|><|446|><|1207|><|140|><|873|><|147|><|1051|><|210|><|1216|><|147|><|1148|><|678|><|501|><|code_end|>\nis<|t_0.13|><|code_start|><|912|><|822|><|622|><|519|><|1017|><|546|><|1740|><|1823|><|1561|><|273|><|code_end|>\nwhat<|t_0.16|><|code_start|><|1571|><|1597|><|486|><|1417|><|130|><|747|><|1088|><|1045|><|580|><|239|><|431|><|40|><|code_end|>\nit<|t_0.12|><|code_start|><|1736|><|878|><|1159|><|1004|><|1168|><|594|><|544|><|77|><|1032|><|code_end|>\nis<|t_0.28|><|code_start|><|1088|><|873|><|1726|><|1099|><|1095|><|1412|><|1106|><|1317|><|1292|><|149|><|1429|><|967|><|873|><|1754|><|229|><|1046|><|1595|><|1003|><|1603|><|1529|><|101|><|code_end|>";
|
||||
break;
|
||||
}
|
||||
last_speaker_codes = common_tokenize(model_ttc, speaker, false, true);
|
||||
last_speaker_seed = speaker_seed;
|
||||
if(!inputs.quiet && ttsdebugmode==1)
|
||||
|
@ -818,19 +842,22 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
|
|||
const float * embd = llama_get_embeddings(cts_ctx);
|
||||
std::vector<float> audio = embd_to_audio(embd, n_codes, n_embd, 4);
|
||||
|
||||
const int n_sr = 24000; // sampling rate
|
||||
const int n_sr = 24000; // original sampling rate
|
||||
const int t_sr = 16000; //final target sampling rate
|
||||
|
||||
// zero out first 0.2 seconds or 0.05 depending on whether its seeded
|
||||
const int cutout = (speaker_seed>0?(24000/5):(24000/20));
|
||||
// zero out first 0.1 seconds or 0.05 depending on whether its seeded
|
||||
const int cutout = (speaker_seed>0?(n_sr/10):(n_sr/20));
|
||||
for (int i = 0; i < cutout; ++i) {
|
||||
audio[i] = 0.0f;
|
||||
}
|
||||
//add some silence at the end
|
||||
for (int i = 0; i < 24000/10; ++i) {
|
||||
for (int i = 0; i < n_sr/10; ++i) {
|
||||
audio.push_back(0.0f);
|
||||
}
|
||||
|
||||
last_generated_audio = save_wav16_base64(audio, n_sr);
|
||||
audio = resample_wav(audio,n_sr,t_sr); //resample to 16k
|
||||
|
||||
last_generated_audio = save_wav16_base64(audio, t_sr);
|
||||
ttstime = timer_check();
|
||||
|
||||
if(!inputs.quiet)
|
||||
|
|
|
@ -345,6 +345,29 @@ std::string get_timestamp_str()
|
|||
return timestamp;
|
||||
}
|
||||
|
||||
std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_rate, uint32_t output_rate) {
|
||||
|
||||
size_t input_size = input.size();
|
||||
|
||||
double ratio = static_cast<double>(output_rate) / input_rate;
|
||||
size_t newLength = static_cast<size_t>(input.size() * ratio);
|
||||
std::vector<float> output(newLength);
|
||||
|
||||
// Perform simple linear interpolation resampling
|
||||
for (size_t i = 0; i < newLength; ++i) {
|
||||
double srcIndex = i / ratio;
|
||||
size_t srcIndexInt = static_cast<size_t>(srcIndex);
|
||||
double frac = srcIndex - srcIndexInt;
|
||||
if (srcIndexInt + 1 < input_size) {
|
||||
output[i] = static_cast<float>(input[srcIndexInt] * (1 - frac) + input[srcIndexInt + 1] * frac);
|
||||
} else {
|
||||
output[i] = input[srcIndexInt];
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
//a very rudimentary all in one sampling function which has no dependencies
|
||||
int32_t kcpp_quick_sample(float * logits, const int n_logits, int top_k, float temp, std::mt19937 & rng)
|
||||
{
|
||||
|
|
|
@ -61,6 +61,8 @@ std::string kcpp_base64_encode(const unsigned char* data, unsigned int data_leng
|
|||
std::string kcpp_base64_encode(const std::string &data);
|
||||
|
||||
std::string get_timestamp_str();
|
||||
std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_rate, uint32_t output_rate);
|
||||
|
||||
int32_t kcpp_quick_sample(float * logits, const int n_logits, int top_k, float temp, std::mt19937 & rng);
|
||||
|
||||
struct kcpp_embd_batch { //duplcated from llava_embd_batch
|
||||
|
|
|
@ -41,35 +41,6 @@ static bool is_wav_buffer(const std::string buf) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static std::vector<float> resample_wav(const std::vector<float>& input, uint32_t input_rate, uint32_t output_rate) {
|
||||
|
||||
size_t input_size = input.size();
|
||||
|
||||
double ratio = static_cast<double>(output_rate) / input_rate;
|
||||
size_t newLength = static_cast<size_t>(input.size() * ratio);
|
||||
std::vector<float> output(newLength);
|
||||
|
||||
if(whisperdebugmode==1)
|
||||
{
|
||||
printf("\nResample wav from %" PRIu32 " to %" PRIu32 " (in size: %zu, out size: %zu)",
|
||||
input_rate, output_rate, input_size, static_cast<std::size_t>(output.size()));
|
||||
}
|
||||
|
||||
// Perform simple linear interpolation resampling
|
||||
for (size_t i = 0; i < newLength; ++i) {
|
||||
double srcIndex = i / ratio;
|
||||
size_t srcIndexInt = static_cast<size_t>(srcIndex);
|
||||
double frac = srcIndex - srcIndexInt;
|
||||
if (srcIndexInt + 1 < input_size) {
|
||||
output[i] = static_cast<float>(input[srcIndexInt] * (1 - frac) + input[srcIndexInt + 1] * frac);
|
||||
} else {
|
||||
output[i] = input[srcIndexInt];
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
static bool read_wav(const std::string & b64data, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo)
|
||||
{
|
||||
drwav wav;
|
||||
|
@ -119,6 +90,11 @@ static bool read_wav(const std::string & b64data, std::vector<float>& pcmf32, st
|
|||
}
|
||||
|
||||
if (wav.sampleRate != COMMON_SAMPLE_RATE) {
|
||||
if(whisperdebugmode==1)
|
||||
{
|
||||
printf("\nResample wav from %" PRIu32 " to %" PRIu32 " (in size: %zu)",
|
||||
wav.sampleRate, COMMON_SAMPLE_RATE, raw_pcm.size());
|
||||
}
|
||||
raw_pcm = resample_wav(raw_pcm, wav.sampleRate, COMMON_SAMPLE_RATE);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue