mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 17:44:38 +00:00
fixed qwen2 audio issues, works fine now (+3 squashed commit)
Squashed commit: [b3053a1ba] updated lite [5071630d6] fixed mtmd issues, audio works [06efa5af4] fix mtmd compile
This commit is contained in:
parent
5a3b2e3921
commit
dca49de059
6 changed files with 218 additions and 90 deletions
2
Makefile
2
Makefile
|
@ -714,7 +714,7 @@ ttsmain: tools/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-op
|
|||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
gguf-split: tools/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
mtmd-cli: tools/mtmd/mtmd-cli.cpp tools/mtmd/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||
mtmd-cli: tools/mtmd/mtmd-cli.cpp tools/mtmd/mtmd.cpp tools/mtmd/mtmd-helper.cpp tools/mtmd/clip.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o ggml-backend-reg_vulkan.o ggml-vulkan.o $(OBJS_FULL) $(OBJS) lib/vulkan-1.lib
|
||||
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
|
|
@ -3089,7 +3089,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep
|
|||
} else {
|
||||
if(debugmode==1 && !is_quiet)
|
||||
{
|
||||
printf("\nAudio Clip Embed Chunk %i used Tokens: %d",i,chunk.clp_image_tokens);
|
||||
printf("\nAudio Clip %i Embed Chunk used Tokens: %d",i,chunk.clp_image_tokens);
|
||||
}
|
||||
total_chunk_tokens += chunk.clp_image_tokens;
|
||||
media_objects[i].mediachunks.push_back(chunk);
|
||||
|
@ -3480,7 +3480,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
|
||||
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
|
||||
TokenizeString("\n\n", media_sep, file_format, false);
|
||||
TokenizeString("\nImages:\n", media_intro, file_format, false);
|
||||
TokenizeString("\nAttached Media:\n", media_intro, file_format, false);
|
||||
|
||||
if(media_composite_image_signature=="")
|
||||
{
|
||||
|
|
294
klite.embd
294
klite.embd
|
@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
|
|||
-->
|
||||
|
||||
<script id="init-config">
|
||||
const LITEVER = 259;
|
||||
const LITEVER = 260;
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
|
||||
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
|
||||
|
@ -1261,28 +1261,28 @@ Current version indicated by LITEVER below.
|
|||
.zoomedimg
|
||||
{
|
||||
border-radius: 6%;
|
||||
width:462px;
|
||||
height:462px;
|
||||
width:420px;
|
||||
height:420px;
|
||||
}
|
||||
.zoomedimg.portrait
|
||||
{
|
||||
width:308px;
|
||||
height:462px;
|
||||
width:280px;
|
||||
height:420px;
|
||||
}
|
||||
.zoomedimg.portrait_long
|
||||
{
|
||||
width:231px;
|
||||
height:462px;
|
||||
width:210px;
|
||||
height:420px;
|
||||
}
|
||||
.zoomedimg.landscape
|
||||
{
|
||||
width:462px;
|
||||
height:308px;
|
||||
width:420px;
|
||||
height:280px;
|
||||
}
|
||||
.zoomedimg.landscape_long
|
||||
{
|
||||
width:462px;
|
||||
height:231px;
|
||||
width:420px;
|
||||
height:210px;
|
||||
}
|
||||
@media (max-width: 620px) {
|
||||
.zoomedimg {
|
||||
|
@ -3670,17 +3670,20 @@ Current version indicated by LITEVER below.
|
|||
//truncate to first 3 bytes
|
||||
return hsh.substring(0, hashBytes*2);
|
||||
};
|
||||
function b64_to_persistent_blob(data)
|
||||
function b64_to_persistent_blob(data, refhash="") //refhash will be calculated if not provided
|
||||
{
|
||||
if(!data || !data.startsWith("data:"))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
let audiohash = cyrb_hash(data);
|
||||
let fetchedblob = data_hash_to_blob_lookup[audiohash];
|
||||
if(refhash=="")
|
||||
{
|
||||
refhash = cyrb_hash(data);
|
||||
}
|
||||
let fetchedblob = data_hash_to_blob_lookup[refhash];
|
||||
if(fetchedblob)
|
||||
{
|
||||
return fetchedblob;
|
||||
return fetchedblob.blob;
|
||||
}
|
||||
let splits = data.split(";base64,");
|
||||
let dtype = splits[0];
|
||||
|
@ -3695,7 +3698,7 @@ Current version indicated by LITEVER below.
|
|||
// Create Blob and URL
|
||||
const audioBlob = new Blob([bytes], {type: dtype});
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
data_hash_to_blob_lookup[audiohash] = audioUrl;
|
||||
data_hash_to_blob_lookup[refhash] = {"id":audioUrl,"original":data,"blob":audioUrl};
|
||||
return audioUrl;
|
||||
}
|
||||
function basic_lcg(seed) { // simple RNG for reproducible dice rolls
|
||||
|
@ -4002,10 +4005,11 @@ Current version indicated by LITEVER below.
|
|||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
const file = files[0];
|
||||
const fname = files[0].name;
|
||||
const reader = new FileReader();
|
||||
reader.onload = function(img) {
|
||||
let origImg = img.target.result;
|
||||
self_upload_file_dispatch(origImg);
|
||||
self_upload_file_dispatch(origImg,fname);
|
||||
}
|
||||
reader.readAsDataURL(file);
|
||||
}
|
||||
|
@ -14663,21 +14667,21 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("addmediacontainer").classList.add("hidden");
|
||||
}
|
||||
|
||||
function self_upload_file_dispatch(data)
|
||||
function self_upload_file_dispatch(data,filename)
|
||||
{
|
||||
if(data.startsWith("data:audio"))
|
||||
{
|
||||
self_upload_audio(data);
|
||||
self_upload_audio(data,filename);
|
||||
}
|
||||
else if(data.startsWith("data:image"))
|
||||
{
|
||||
self_upload_img(data);
|
||||
self_upload_img(data,filename);
|
||||
}else{
|
||||
msgbox("Unsupported File Format!\nOnly Image and Audio files are supported!","Unsupported File Format");
|
||||
}
|
||||
}
|
||||
|
||||
function self_upload_img(origImg)
|
||||
function self_upload_img(origImg,filename)
|
||||
{
|
||||
let imgid = "selfuploadimg"+(Math.floor(10000 + Math.random() * 90000)).toString();
|
||||
let nimgtag = "[<|p|" + imgid + "|p|>]";
|
||||
|
@ -14719,7 +14723,7 @@ Current version indicated by LITEVER below.
|
|||
}, false, imgres,0.35,true);
|
||||
}
|
||||
|
||||
function self_upload_audio(origAudio)
|
||||
function self_upload_audio(origAudio,filename)
|
||||
{
|
||||
let imgid = "selfuploadaudio"+(Math.floor(10000 + Math.random() * 90000)).toString();
|
||||
let nimgtag = "[<|p|" + imgid + "|p|>]";
|
||||
|
@ -14737,10 +14741,12 @@ Current version indicated by LITEVER below.
|
|||
image_db[imgid] = { done: false, queue: "Processing", result: "", prompt:"", poll_category:0 };
|
||||
image_db[imgid].aspect = 0;
|
||||
image_db[imgid].imsource = 1; //0=generated,1=uploaded
|
||||
image_db[imgid].imrefid = "";
|
||||
convertAudioToCompressedBase64(origAudio,(newAudio)=>{
|
||||
image_db[imgid].imrefid = filename;
|
||||
image_db[imgid].len = 0;
|
||||
convertAudioToCompressedBase64(origAudio,(newAudio,duration)=>{
|
||||
image_db[imgid].done = true;
|
||||
image_db[imgid].result = newAudio;
|
||||
image_db[imgid].len = duration;
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -14760,7 +14766,7 @@ Current version indicated by LITEVER below.
|
|||
var reader = new FileReader();
|
||||
reader.onload = function(event){
|
||||
let origImg = event.target.result;
|
||||
self_upload_file_dispatch(origImg);
|
||||
self_upload_file_dispatch(origImg,"");
|
||||
};
|
||||
reader.readAsDataURL(blob);
|
||||
founditem = true;
|
||||
|
@ -14789,10 +14795,11 @@ Current version indicated by LITEVER below.
|
|||
console.log(files);
|
||||
if (files.length > 0 && files[0] != null && files[0].name && files[0].name != "") {
|
||||
const file = files[0];
|
||||
const fname = files[0].name;
|
||||
const reader = new FileReader();
|
||||
reader.onload = function(img) {
|
||||
let origImg = img.target.result;
|
||||
self_upload_file_dispatch(origImg);
|
||||
self_upload_file_dispatch(origImg, fname);
|
||||
}
|
||||
reader.readAsDataURL(file);
|
||||
document.getElementById("pasteimgcontainer").classList.add("hidden");
|
||||
|
@ -14875,7 +14882,7 @@ Current version indicated by LITEVER below.
|
|||
const sy = (videoHeight - sideLength) / 2;
|
||||
context.drawImage(video, sx, sy, sideLength, sideLength, 0, 0, 512, 512);
|
||||
const dataURL = canvas.toDataURL('image/png');
|
||||
self_upload_file_dispatch(dataURL); // Call your upload function
|
||||
self_upload_file_dispatch(dataURL,""); // Call your upload function
|
||||
hide_popups();
|
||||
}
|
||||
|
||||
|
@ -14886,10 +14893,11 @@ Current version indicated by LITEVER below.
|
|||
finput.onchange = (event) => {
|
||||
if (event.target.files.length > 0 && event.target.files[0]) {
|
||||
const file = event.target.files[0];
|
||||
const fname = file.name;
|
||||
const reader = new FileReader();
|
||||
reader.onload = function(img) {
|
||||
let origImg = img.target.result;
|
||||
self_upload_file_dispatch(origImg);
|
||||
self_upload_file_dispatch(origImg,fname);
|
||||
}
|
||||
reader.readAsDataURL(file);
|
||||
}
|
||||
|
@ -16345,6 +16353,10 @@ Current version indicated by LITEVER below.
|
|||
{
|
||||
submit_payload.params.images = insertAIVisionImages;
|
||||
}
|
||||
if(is_using_kcpp_with_llava() && insertAIAudioSounds.length>0)
|
||||
{
|
||||
submit_payload.params.audio = insertAIAudioSounds;
|
||||
}
|
||||
|
||||
if(localsettings.sampler_seed>=1)
|
||||
{
|
||||
|
@ -17689,6 +17701,51 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
|
||||
}
|
||||
function zoomed_transcribe_btn(audiohash,onDone)
|
||||
{
|
||||
let fetchedblob = data_hash_to_blob_lookup[audiohash];
|
||||
if(!fetchedblob)
|
||||
{
|
||||
return;
|
||||
}
|
||||
fetch(fetchedblob.blob)
|
||||
.then(x => x.blob())
|
||||
.then(completeRecording => {
|
||||
audioBlobToDecodedAudioBuffer(completeRecording,(buffer)=>{
|
||||
resampleAudioBuffer(buffer,16000,(rsBuffer)=>{
|
||||
let wavblob = audioBufferToWavBlob(rsBuffer);
|
||||
const reader = new FileReader();
|
||||
reader.onload = function(audiodata) {
|
||||
let dataurl = audiodata.target.result;
|
||||
let payload = {
|
||||
"audio_data": dataurl,
|
||||
"prompt": "",
|
||||
"suppress_non_speech": (document.getElementById("voice_suppress_nonspeech").checked?true:false),
|
||||
"langcode": document.getElementById("voice_langcode").value
|
||||
};
|
||||
fetch(apply_proxy_url(custom_kobold_endpoint + koboldcpp_transcribe_endpoint), {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
})
|
||||
.then(x => x.json())
|
||||
.then(resp => {
|
||||
console.log(resp);
|
||||
if(resp && resp.text && resp.text!="")
|
||||
{
|
||||
msgbox(resp.text,"Transcribed Audio");
|
||||
}
|
||||
}).catch((error) => {
|
||||
console.log("Transcribe Error: " + error);
|
||||
});
|
||||
}
|
||||
reader.readAsDataURL(wavblob);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function toggle_ai_vision(imghash)
|
||||
{
|
||||
|
@ -17700,7 +17757,7 @@ Current version indicated by LITEVER below.
|
|||
{
|
||||
//request a new interrogation
|
||||
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
|
||||
if(!alreadysent)
|
||||
if(!alreadysent && document.getElementById("zoomedimg"))
|
||||
{
|
||||
let b64 = document.getElementById("zoomedimg").src;
|
||||
interrogate_new_image(b64,imghash,(savedmeta.visionmode==1));
|
||||
|
@ -17719,10 +17776,33 @@ Current version indicated by LITEVER below.
|
|||
let savedmeta = completed_imgs_meta[imghash];
|
||||
if(!savedmeta && imghash!="")
|
||||
{
|
||||
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0};
|
||||
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0};
|
||||
}
|
||||
if(!savedmeta.visionmode)
|
||||
{
|
||||
savedmeta.visionmode = 0;
|
||||
}
|
||||
let hasllava = is_using_kcpp_with_llava();
|
||||
let visionstatus = "";
|
||||
if(savedmeta.visionmode==3)
|
||||
{
|
||||
if(custom_kobold_endpoint!="") //on a kobo endpoint
|
||||
{
|
||||
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(hasllava?`<span class="color_green">Active</span>`:`<span class="color_yellow">Unsupported</span>`));
|
||||
}
|
||||
else
|
||||
{
|
||||
let isoai = (custom_oai_key!="" && document.getElementById("useoaichatcompl").checked);
|
||||
let isgemini = (custom_gemini_key!="");
|
||||
visionstatus = (isoai?`<span class="color_green">OpenAI API (Conditional)</span>`:(isgemini?`<span class="color_green">Gemini API (Conditional)</span>`:`<span class="color_yellow">Unsupported</span>`));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(savedmeta.desc?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing</span>`));
|
||||
}
|
||||
|
||||
if(savedmeta)
|
||||
if(savedmeta && document.getElementById("zoomedimg"))
|
||||
{
|
||||
document.getElementById("zoomedimg").classList.remove("portrait");
|
||||
document.getElementById("zoomedimg").classList.remove("landscape");
|
||||
|
@ -17745,32 +17825,8 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("zoomedimg").classList.add("landscape_long");
|
||||
}
|
||||
|
||||
if(!savedmeta.visionmode)
|
||||
{
|
||||
savedmeta.visionmode = 0;
|
||||
}
|
||||
|
||||
let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description");
|
||||
latest_orig_prompt = origprompt;
|
||||
let hasllava = is_using_kcpp_with_llava();
|
||||
let visionstatus = "";
|
||||
if(savedmeta.visionmode==3)
|
||||
{
|
||||
if(custom_kobold_endpoint!="") //on a kobo endpoint
|
||||
{
|
||||
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(hasllava?`<span class="color_green">Active</span>`:`<span class="color_yellow">Unsupported</span>`));
|
||||
}
|
||||
else
|
||||
{
|
||||
let isoai = (custom_oai_key!="" && document.getElementById("useoaichatcompl").checked);
|
||||
let isgemini = (custom_gemini_key!="");
|
||||
visionstatus = (isoai?`<span class="color_green">OpenAI API (Conditional)</span>`:(isgemini?`<span class="color_green">Gemini API (Conditional)</span>`:`<span class="color_yellow">Unsupported</span>`));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(savedmeta.desc?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing</span>`));
|
||||
}
|
||||
|
||||
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 140px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
|
||||
<option value="0">Disabled</option>
|
||||
|
@ -17786,6 +17842,25 @@ Current version indicated by LITEVER below.
|
|||
`;
|
||||
document.getElementById("aivisionmode").value = savedmeta.visionmode;
|
||||
}
|
||||
else if(savedmeta && document.getElementById("zoomedaudio"))
|
||||
{
|
||||
let transcribebtn = "";
|
||||
if(is_using_kcpp_with_whisper())
|
||||
{
|
||||
transcribebtn = `<button type="button" class="btn btn-primary" style="width: 140px; padding: 2px; margin: 3px; font-size:12px;" onclick="zoomed_transcribe_btn(\'`+imghash+`\')">Transcribe Audio</button>`;
|
||||
}
|
||||
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 140px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
|
||||
<option value="0">Disabled</option>
|
||||
<option value="3">Multimodal Audio</option>
|
||||
</select>`;
|
||||
document.getElementById("zoomedimgdesc").innerHTML = `
|
||||
AI Embed Audio: `+visionstatus+` <span class="helpicon">?<span class="helptext">Allows the AI to hear and react to this audio (on supported models). Transcribe tries to replace the audio file with detected speech.</span></span>
|
||||
${togglebtn}
|
||||
<br>
|
||||
${transcribebtn}
|
||||
`;
|
||||
document.getElementById("aivisionmode").value = savedmeta.visionmode;
|
||||
}
|
||||
else
|
||||
{
|
||||
document.getElementById("zoomedimgdesc").innerText = "No Saved Data";
|
||||
|
@ -17801,7 +17876,7 @@ Current version indicated by LITEVER below.
|
|||
{
|
||||
inputBox("Enter prompt to create a new image, based on this source image.","Create Img2Img","","Enter Img2Img Prompt",()=>{
|
||||
let userinput = getInputBoxValue();
|
||||
if(userinput.trim()!="")
|
||||
if(userinput.trim()!="" && document.getElementById("zoomedimg"))
|
||||
{
|
||||
var sentence = userinput.trim().substring(0, 380);
|
||||
let b64 = document.getElementById("zoomedimg").src;
|
||||
|
@ -17812,26 +17887,60 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
function click_image(target,imghash)
|
||||
{
|
||||
clear_zoomed_img_and_audio();
|
||||
if(target)
|
||||
{
|
||||
if(localsettings.invert_colors)
|
||||
{
|
||||
document.getElementById("zoomedimg").classList.add("invert_colors");
|
||||
}else{
|
||||
document.getElementById("zoomedimg").classList.remove("invert_colors");
|
||||
}
|
||||
|
||||
document.getElementById("zoomedimgcontainer").classList.remove("hidden");
|
||||
document.getElementById("zoomedimg").src = target.src;
|
||||
|
||||
let src = `<img class="zoomedimg ${localsettings.invert_colors?"invert_colors":""}" id="zoomedimg" src="${target.src}">`;
|
||||
document.getElementById("zoomedimgdiv").innerHTML = src;
|
||||
document.getElementById("zoomedimgdiv").classList.remove("hidden");
|
||||
document.getElementById("zoomedaudiodiv").classList.add("hidden");
|
||||
update_clicked_image(imghash);
|
||||
|
||||
}
|
||||
}
|
||||
function delete_curr_image()
|
||||
function click_audio(target,audiohash,audioblob)
|
||||
{
|
||||
let removesrc = document.getElementById("zoomedimg").src;
|
||||
if (removesrc && removesrc != "") {
|
||||
var matchingStr = ("[<|d|" + removesrc + "|d|>]")
|
||||
clear_zoomed_img_and_audio();
|
||||
if(target)
|
||||
{
|
||||
document.getElementById("zoomedimgcontainer").classList.remove("hidden");
|
||||
document.getElementById("zoomedimgdiv").classList.add("hidden");
|
||||
document.getElementById("zoomedaudiodiv").classList.remove("hidden");
|
||||
let src = `<div><audio controls title="AudioPlayer"><source src="${audioblob}" id="zoomedaudio" type="audio/mp3"></audio></div>`;
|
||||
document.getElementById("zoomedaudiodiv").innerHTML = src;
|
||||
update_clicked_image(audiohash);
|
||||
}
|
||||
}
|
||||
function clear_zoomed_img_and_audio()
|
||||
{
|
||||
document.getElementById("zoomedimgdiv").innerHTML = "";
|
||||
document.getElementById("zoomedaudiodiv").innerHTML = "";
|
||||
}
|
||||
function delete_curr_media()
|
||||
{
|
||||
let zoomedimg = document.getElementById("zoomedimg");
|
||||
let zoomedaudio = document.getElementById("zoomedaudio");
|
||||
let targettoremove = "";
|
||||
if (zoomedimg && zoomedimg.src && zoomedimg.src !="") {
|
||||
targettoremove = zoomedimg.src;
|
||||
}
|
||||
else if(zoomedaudio && zoomedaudio.src && zoomedaudio.src !="")
|
||||
{
|
||||
let blobid = zoomedaudio.src;
|
||||
for(v in data_hash_to_blob_lookup)
|
||||
{
|
||||
let itm = data_hash_to_blob_lookup[v];
|
||||
if(itm.id==blobid)
|
||||
{
|
||||
targettoremove = itm.original;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(targettoremove)
|
||||
{
|
||||
var matchingStr = ("[<|d|" + targettoremove + "|d|>]")
|
||||
for (let i = 0; i < gametext_arr.length; ++i) {
|
||||
if (gametext_arr[i].includes(matchingStr)) {
|
||||
gametext_arr[i] = gametext_arr[i].replace(matchingStr, "");
|
||||
|
@ -17883,9 +17992,17 @@ Current version indicated by LITEVER below.
|
|||
|
||||
function render_audio_html(data)
|
||||
{
|
||||
let audioblob = b64_to_persistent_blob(data);
|
||||
const reinvertcolor = localsettings.invert_colors?"invert_colors":"";
|
||||
const str = `<div style="display:flex" class="${reinvertcolor}" contenteditable="false"><audio style="display:flex" controls title="Audio Player"><source src="${audioblob}" type="audio/mp3"></audio></div>`;
|
||||
let audiohash = cyrb_hash(data).trim();
|
||||
let audioblob = b64_to_persistent_blob(data,audiohash);
|
||||
let filename = "";
|
||||
let len = 0;
|
||||
if (completed_imgs_meta[audiohash] != null) {
|
||||
filename = completed_imgs_meta[audiohash].ref;
|
||||
len = completed_imgs_meta[audiohash].len;
|
||||
}
|
||||
let fndisp = filename!=""?`(${filename.substring(0,50)}) `:"";
|
||||
fndisp = len?(`: ${Math.floor(len)}s ${fndisp}`):fndisp;
|
||||
const str = `<span><br><button type="button" title="Attached Audio" class="btn btn-primary" style="font-size:12px; padding:8px 8px; border-radius: 16px" onclick="return click_audio(this,\'${audiohash}\',\'${audioblob}\');">Attached Audio ${fndisp}🔊</button><br></span>`;
|
||||
return str;
|
||||
}
|
||||
|
||||
|
@ -18472,7 +18589,7 @@ Current version indicated by LITEVER below.
|
|||
gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
|
||||
let metaid = cyrb_hash(img.result);
|
||||
//default to llava if supported, and image is self uploaded
|
||||
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:((image_db[key].imsource==1 && is_using_kcpp_with_llava())?3:0), aspect:image_db[key].aspect};
|
||||
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:((image_db[key].imsource==1 && is_using_kcpp_with_llava())?3:0), aspect:image_db[key].aspect, ref:image_db[key].imrefid, len:image_db[key].len};
|
||||
delete image_db[key];
|
||||
}
|
||||
}
|
||||
|
@ -18686,13 +18803,13 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
|
||||
// AUDIO MANIPULATION FUNCTIONS
|
||||
//convert any audio to a webm blob (high compression), returns a promise
|
||||
//convert any audio to a webm blob (high compression)
|
||||
function convertAudioToCompressedBase64(inputBase64, onDone) {
|
||||
// Step 1: Convert base64 string to Blob
|
||||
const matches = inputBase64.match(/^data:(audio\/[a-zA-Z0-9-]+);base64,(.+)$/);
|
||||
if (!matches) {
|
||||
console.log("Convert Audio: Invalid base64 input");
|
||||
onDone(null);
|
||||
onDone(null,null);
|
||||
}
|
||||
|
||||
const mimeType = matches[1];
|
||||
|
@ -18712,6 +18829,7 @@ Current version indicated by LITEVER below.
|
|||
audioContext.decodeAudioData(arrayBuffer, function (buffer) {
|
||||
const samplefreq = buffer.sampleRate;
|
||||
const samples = buffer.getChannelData(0); // mono
|
||||
const durationInSeconds = buffer.duration;
|
||||
const mp3encoder = new lamejs.Mp3Encoder(1, samplefreq, 40); // mono, 16kHz, 40kbps
|
||||
const sampleBlockSize = 1152; //can be anything but make it a multiple of 576 to make encoders life easier
|
||||
let mp3Data = [];
|
||||
|
@ -18737,12 +18855,12 @@ Current version indicated by LITEVER below.
|
|||
const fileReader = new FileReader();
|
||||
fileReader.onloadend = function () {
|
||||
const mp3Base64 = fileReader.result;
|
||||
onDone(mp3Base64);
|
||||
onDone(mp3Base64,durationInSeconds);
|
||||
};
|
||||
fileReader.readAsDataURL(mp3Blob);
|
||||
}, function (err) {
|
||||
console.log("Audio decode failed.");
|
||||
onDone(null);
|
||||
onDone(null,null);
|
||||
});
|
||||
};
|
||||
reader.readAsArrayBuffer(inputBlob);
|
||||
|
@ -19574,6 +19692,7 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
|
||||
var insertAIVisionImages = []; //concat gametext will populate this
|
||||
var insertAIAudioSounds = [];
|
||||
function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false,insertAIVision=false) {
|
||||
let fulltxt = "";
|
||||
for (let i = 0; i < gametext_arr.length; ++i) {
|
||||
|
@ -19637,6 +19756,7 @@ Current version indicated by LITEVER below.
|
|||
if(insertAIVision)
|
||||
{
|
||||
insertAIVisionImages = []; //a bit hacky
|
||||
insertAIAudioSounds = [];
|
||||
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
|
||||
// m here means the whole matched string
|
||||
let inner = m.substring(5, m.length - 5);
|
||||
|
@ -19649,11 +19769,18 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
else if(foundmeta.visionmode==3)
|
||||
{
|
||||
let placeholder = "";
|
||||
let parts = inner.split(',');
|
||||
if (parts.length === 2 && parts[0].startsWith('data:image')) {
|
||||
insertAIVisionImages.push(parts[1]);
|
||||
placeholder = "\n(Attached Image)\n";
|
||||
}
|
||||
return "\n(Attached Image)\n";
|
||||
else if(parts.length === 2 && parts[0].startsWith('data:audio'))
|
||||
{
|
||||
insertAIAudioSounds.push(parts[1]);
|
||||
placeholder = "\n(Attached Audio)\n";
|
||||
}
|
||||
return placeholder;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
|
@ -25798,11 +25925,12 @@ Current version indicated by LITEVER below.
|
|||
<div class="popupbg flex"></div>
|
||||
<div class="nspopup flexsize highest">
|
||||
<div class="popuptitlebar">
|
||||
<div class="popuptitletext">Image Information</div>
|
||||
<div class="popuptitletext">Media Information</div>
|
||||
</div>
|
||||
|
||||
<div class="zoomedimgdiv">
|
||||
<img class="zoomedimg" id="zoomedimg" src="">
|
||||
<div id="zoomedimgdiv" class="zoomedimgdiv">
|
||||
</div>
|
||||
<div id="zoomedaudiodiv" class="zoomedimgdiv">
|
||||
</div>
|
||||
|
||||
<div class="menutext zoomedimgdesc" id="zoomedimgdesc" style="word-wrap: break-word;">
|
||||
|
@ -25810,8 +25938,8 @@ Current version indicated by LITEVER below.
|
|||
</div>
|
||||
<br>
|
||||
<div class="popupfooter">
|
||||
<button type="button" class="bg_red btn btn-primary" style="width: 124px;" onclick="delete_curr_image();hide_popups();">Delete Image</button>
|
||||
<button type="button" class="btn btn-primary" onclick="hide_popups()">Close</button>
|
||||
<button type="button" class="bg_red btn btn-primary" style="width: 124px;" onclick="delete_curr_media();clear_zoomed_img_and_audio();hide_popups();">Delete Media</button>
|
||||
<button type="button" class="btn btn-primary" onclick="clear_zoomed_img_and_audio();hide_popups()">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -454,7 +454,7 @@ bool audio_embd_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const whi
|
|||
batch_f32.entries.push_back(std::move(mel_f32));
|
||||
|
||||
int n_mmproj_embd = clip_n_mmproj_embd(ctx_clip);
|
||||
float * audio_embd = (float *)malloc(n_tokens * n_mmproj_embd);
|
||||
float * audio_embd = (float *)malloc(n_tokens * n_mmproj_embd * sizeof(float));
|
||||
bool ok = clip_image_batch_encode(
|
||||
ctx_clip,
|
||||
n_threads,
|
||||
|
@ -462,5 +462,5 @@ bool audio_embd_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const whi
|
|||
audio_embd);
|
||||
*image_embd_out = audio_embd;
|
||||
*n_img_pos_out = n_tokens;
|
||||
return ok ? 0 : 1;
|
||||
return ok;
|
||||
}
|
|
@ -29,7 +29,7 @@
|
|||
#define MA_API static
|
||||
#include "miniaudio/miniaudio.h"
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
// #define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb/stb_image.h"
|
||||
|
||||
#define LOG_INF(...) fprintf(stdout, __VA_ARGS__)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue