mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 02:19:41 +00:00
allow whisper interrogate mode for audio files
This commit is contained in:
parent
490b13af83
commit
db2e5e43d9
1 changed files with 48 additions and 9 deletions
57
klite.embd
57
klite.embd
|
@ -3459,6 +3459,8 @@ Current version indicated by LITEVER below.
|
||||||
};
|
};
|
||||||
|
|
||||||
const defaultsettings = JSON.parse(JSON.stringify(localsettings));
|
const defaultsettings = JSON.parse(JSON.stringify(localsettings));
|
||||||
|
//visionmode 0=disabled, 1=hordeinterrogate, 2=localinterrogate, 3=multimodal
|
||||||
|
//type 0=img, 1=audio
|
||||||
const default_imgs_meta = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0, data: ""};
|
const default_imgs_meta = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0, type:0, data: ""};
|
||||||
|
|
||||||
//a list of presets users can choose from
|
//a list of presets users can choose from
|
||||||
|
@ -17916,11 +17918,18 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
function zoomed_transcribe_btn(audiohash,onDone)
|
function zoomed_transcribe_btn(audiohash)
|
||||||
|
{
|
||||||
|
transcribe_audio_file(audiohash,(txt)=>{
|
||||||
|
msgbox(txt,"Transcribed Audio");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
function transcribe_audio_file(audiohash,onDone)
|
||||||
{
|
{
|
||||||
let fetchedblob = data_hash_to_blob_lookup[audiohash];
|
let fetchedblob = data_hash_to_blob_lookup[audiohash];
|
||||||
if(!fetchedblob)
|
if(!fetchedblob)
|
||||||
{
|
{
|
||||||
|
onDone(null);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fetch(fetchedblob.blob)
|
fetch(fetchedblob.blob)
|
||||||
|
@ -17950,10 +17959,14 @@ Current version indicated by LITEVER below.
|
||||||
console.log(resp);
|
console.log(resp);
|
||||||
if(resp && resp.text && resp.text!="")
|
if(resp && resp.text && resp.text!="")
|
||||||
{
|
{
|
||||||
msgbox(resp.text,"Transcribed Audio");
|
onDone(resp.text);
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
onDone(null);
|
||||||
}
|
}
|
||||||
}).catch((error) => {
|
}).catch((error) => {
|
||||||
console.log("Transcribe Error: " + error);
|
console.log("Transcribe Error: " + error);
|
||||||
|
onDone(null);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
reader.readAsDataURL(wavblob);
|
reader.readAsDataURL(wavblob);
|
||||||
|
@ -17968,14 +17981,34 @@ Current version indicated by LITEVER below.
|
||||||
if(savedmeta)
|
if(savedmeta)
|
||||||
{
|
{
|
||||||
savedmeta.visionmode = document.getElementById("aivisionmode").value;
|
savedmeta.visionmode = document.getElementById("aivisionmode").value;
|
||||||
if(!savedmeta.desc && (savedmeta.visionmode==1 || savedmeta.visionmode==2))
|
if(savedmeta.type==1) //audio
|
||||||
{
|
{
|
||||||
//request a new interrogation
|
if(!savedmeta.desc && savedmeta.visionmode==2)
|
||||||
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
|
|
||||||
if(!alreadysent && document.getElementById("zoomedimg"))
|
|
||||||
{
|
{
|
||||||
let b64 = document.getElementById("zoomedimg").src;
|
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
|
||||||
interrogate_new_image(b64,imghash,(savedmeta.visionmode==1));
|
if(!alreadysent && document.getElementById("zoomedaudio"))
|
||||||
|
{
|
||||||
|
transcribe_audio_file(imghash,(txt)=>{
|
||||||
|
if(txt)
|
||||||
|
{
|
||||||
|
savedmeta.desc = txt;
|
||||||
|
update_clicked_image(imghash);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else //images
|
||||||
|
{
|
||||||
|
if(!savedmeta.desc && (savedmeta.visionmode==1 || savedmeta.visionmode==2))
|
||||||
|
{
|
||||||
|
//request a new interrogation
|
||||||
|
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
|
||||||
|
if(!alreadysent && document.getElementById("zoomedimg"))
|
||||||
|
{
|
||||||
|
let b64 = document.getElementById("zoomedimg").src;
|
||||||
|
interrogate_new_image(b64,imghash,(savedmeta.visionmode==1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
update_clicked_image(imghash);
|
update_clicked_image(imghash);
|
||||||
|
@ -18066,6 +18099,7 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 140px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
|
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 140px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
|
||||||
<option value="0">Disabled</option>
|
<option value="0">Disabled</option>
|
||||||
|
<option value="2">Transcribe (Local)</option>
|
||||||
<option value="3">Multimodal Audio</option>
|
<option value="3">Multimodal Audio</option>
|
||||||
</select>`;
|
</select>`;
|
||||||
document.getElementById("zoomedimgdesc").innerHTML = `
|
document.getElementById("zoomedimgdesc").innerHTML = `
|
||||||
|
@ -20000,7 +20034,12 @@ Current version indicated by LITEVER below.
|
||||||
let data = foundmeta.data;
|
let data = foundmeta.data;
|
||||||
if(foundmeta.desc && (foundmeta.visionmode==1||foundmeta.visionmode==2))
|
if(foundmeta.desc && (foundmeta.visionmode==1||foundmeta.visionmode==2))
|
||||||
{
|
{
|
||||||
return "\n(Attached Image: " + foundmeta.desc + ")\n";
|
if(foundmeta.type==1)//audio
|
||||||
|
{
|
||||||
|
return "\n(Attached Audio: " + foundmeta.desc + ")\n";
|
||||||
|
}else{
|
||||||
|
return "\n(Attached Image: " + foundmeta.desc + ")\n";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if(foundmeta.visionmode==3)
|
else if(foundmeta.visionmode==3)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue