mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 17:44:38 +00:00
fixed qwen2 audio issues, works fine now (+3 squashed commit)
Squashed commit: [b3053a1ba] updated lite [5071630d6] fixed mtmd issues, audio works [06efa5af4] fix mtmd compile
This commit is contained in:
parent
5a3b2e3921
commit
dca49de059
6 changed files with 218 additions and 90 deletions
2
Makefile
2
Makefile
|
@ -714,7 +714,7 @@ ttsmain: tools/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-op
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
gguf-split: tools/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
gguf-split: tools/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
mtmd-cli: tools/mtmd/mtmd-cli.cpp tools/mtmd/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
mtmd-cli: tools/mtmd/mtmd-cli.cpp tools/mtmd/mtmd.cpp tools/mtmd/mtmd-helper.cpp tools/mtmd/clip.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o ggml-backend-reg_vulkan.o ggml-vulkan.o $(OBJS_FULL) $(OBJS) lib/vulkan-1.lib
|
mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o ggml-backend-reg_vulkan.o ggml-vulkan.o $(OBJS_FULL) $(OBJS) lib/vulkan-1.lib
|
||||||
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
|
@ -3089,7 +3089,7 @@ static void PrepareMediaEmbds(const int nctx, const std::vector<int> & media_sep
|
||||||
} else {
|
} else {
|
||||||
if(debugmode==1 && !is_quiet)
|
if(debugmode==1 && !is_quiet)
|
||||||
{
|
{
|
||||||
printf("\nAudio Clip Embed Chunk %i used Tokens: %d",i,chunk.clp_image_tokens);
|
printf("\nAudio Clip %i Embed Chunk used Tokens: %d",i,chunk.clp_image_tokens);
|
||||||
}
|
}
|
||||||
total_chunk_tokens += chunk.clp_image_tokens;
|
total_chunk_tokens += chunk.clp_image_tokens;
|
||||||
media_objects[i].mediachunks.push_back(chunk);
|
media_objects[i].mediachunks.push_back(chunk);
|
||||||
|
@ -3480,7 +3480,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
|
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
|
||||||
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
|
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
|
||||||
TokenizeString("\n\n", media_sep, file_format, false);
|
TokenizeString("\n\n", media_sep, file_format, false);
|
||||||
TokenizeString("\nImages:\n", media_intro, file_format, false);
|
TokenizeString("\nAttached Media:\n", media_intro, file_format, false);
|
||||||
|
|
||||||
if(media_composite_image_signature=="")
|
if(media_composite_image_signature=="")
|
||||||
{
|
{
|
||||||
|
|
294
klite.embd
294
klite.embd
|
@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<script id="init-config">
|
<script id="init-config">
|
||||||
const LITEVER = 259;
|
const LITEVER = 260;
|
||||||
const urlParams = new URLSearchParams(window.location.search);
|
const urlParams = new URLSearchParams(window.location.search);
|
||||||
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
|
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
|
||||||
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
|
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
|
||||||
|
@ -1261,28 +1261,28 @@ Current version indicated by LITEVER below.
|
||||||
.zoomedimg
|
.zoomedimg
|
||||||
{
|
{
|
||||||
border-radius: 6%;
|
border-radius: 6%;
|
||||||
width:462px;
|
width:420px;
|
||||||
height:462px;
|
height:420px;
|
||||||
}
|
}
|
||||||
.zoomedimg.portrait
|
.zoomedimg.portrait
|
||||||
{
|
{
|
||||||
width:308px;
|
width:280px;
|
||||||
height:462px;
|
height:420px;
|
||||||
}
|
}
|
||||||
.zoomedimg.portrait_long
|
.zoomedimg.portrait_long
|
||||||
{
|
{
|
||||||
width:231px;
|
width:210px;
|
||||||
height:462px;
|
height:420px;
|
||||||
}
|
}
|
||||||
.zoomedimg.landscape
|
.zoomedimg.landscape
|
||||||
{
|
{
|
||||||
width:462px;
|
width:420px;
|
||||||
height:308px;
|
height:280px;
|
||||||
}
|
}
|
||||||
.zoomedimg.landscape_long
|
.zoomedimg.landscape_long
|
||||||
{
|
{
|
||||||
width:462px;
|
width:420px;
|
||||||
height:231px;
|
height:210px;
|
||||||
}
|
}
|
||||||
@media (max-width: 620px) {
|
@media (max-width: 620px) {
|
||||||
.zoomedimg {
|
.zoomedimg {
|
||||||
|
@ -3670,17 +3670,20 @@ Current version indicated by LITEVER below.
|
||||||
//truncate to first 3 bytes
|
//truncate to first 3 bytes
|
||||||
return hsh.substring(0, hashBytes*2);
|
return hsh.substring(0, hashBytes*2);
|
||||||
};
|
};
|
||||||
function b64_to_persistent_blob(data)
|
function b64_to_persistent_blob(data, refhash="") //refhash will be calculated if not provided
|
||||||
{
|
{
|
||||||
if(!data || !data.startsWith("data:"))
|
if(!data || !data.startsWith("data:"))
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
let audiohash = cyrb_hash(data);
|
if(refhash=="")
|
||||||
let fetchedblob = data_hash_to_blob_lookup[audiohash];
|
{
|
||||||
|
refhash = cyrb_hash(data);
|
||||||
|
}
|
||||||
|
let fetchedblob = data_hash_to_blob_lookup[refhash];
|
||||||
if(fetchedblob)
|
if(fetchedblob)
|
||||||
{
|
{
|
||||||
return fetchedblob;
|
return fetchedblob.blob;
|
||||||
}
|
}
|
||||||
let splits = data.split(";base64,");
|
let splits = data.split(";base64,");
|
||||||
let dtype = splits[0];
|
let dtype = splits[0];
|
||||||
|
@ -3695,7 +3698,7 @@ Current version indicated by LITEVER below.
|
||||||
// Create Blob and URL
|
// Create Blob and URL
|
||||||
const audioBlob = new Blob([bytes], {type: dtype});
|
const audioBlob = new Blob([bytes], {type: dtype});
|
||||||
const audioUrl = URL.createObjectURL(audioBlob);
|
const audioUrl = URL.createObjectURL(audioBlob);
|
||||||
data_hash_to_blob_lookup[audiohash] = audioUrl;
|
data_hash_to_blob_lookup[refhash] = {"id":audioUrl,"original":data,"blob":audioUrl};
|
||||||
return audioUrl;
|
return audioUrl;
|
||||||
}
|
}
|
||||||
function basic_lcg(seed) { // simple RNG for reproducible dice rolls
|
function basic_lcg(seed) { // simple RNG for reproducible dice rolls
|
||||||
|
@ -4002,10 +4005,11 @@ Current version indicated by LITEVER below.
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
e.stopPropagation();
|
e.stopPropagation();
|
||||||
const file = files[0];
|
const file = files[0];
|
||||||
|
const fname = files[0].name;
|
||||||
const reader = new FileReader();
|
const reader = new FileReader();
|
||||||
reader.onload = function(img) {
|
reader.onload = function(img) {
|
||||||
let origImg = img.target.result;
|
let origImg = img.target.result;
|
||||||
self_upload_file_dispatch(origImg);
|
self_upload_file_dispatch(origImg,fname);
|
||||||
}
|
}
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
}
|
}
|
||||||
|
@ -14663,21 +14667,21 @@ Current version indicated by LITEVER below.
|
||||||
document.getElementById("addmediacontainer").classList.add("hidden");
|
document.getElementById("addmediacontainer").classList.add("hidden");
|
||||||
}
|
}
|
||||||
|
|
||||||
function self_upload_file_dispatch(data)
|
function self_upload_file_dispatch(data,filename)
|
||||||
{
|
{
|
||||||
if(data.startsWith("data:audio"))
|
if(data.startsWith("data:audio"))
|
||||||
{
|
{
|
||||||
self_upload_audio(data);
|
self_upload_audio(data,filename);
|
||||||
}
|
}
|
||||||
else if(data.startsWith("data:image"))
|
else if(data.startsWith("data:image"))
|
||||||
{
|
{
|
||||||
self_upload_img(data);
|
self_upload_img(data,filename);
|
||||||
}else{
|
}else{
|
||||||
msgbox("Unsupported File Format!\nOnly Image and Audio files are supported!","Unsupported File Format");
|
msgbox("Unsupported File Format!\nOnly Image and Audio files are supported!","Unsupported File Format");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function self_upload_img(origImg)
|
function self_upload_img(origImg,filename)
|
||||||
{
|
{
|
||||||
let imgid = "selfuploadimg"+(Math.floor(10000 + Math.random() * 90000)).toString();
|
let imgid = "selfuploadimg"+(Math.floor(10000 + Math.random() * 90000)).toString();
|
||||||
let nimgtag = "[<|p|" + imgid + "|p|>]";
|
let nimgtag = "[<|p|" + imgid + "|p|>]";
|
||||||
|
@ -14719,7 +14723,7 @@ Current version indicated by LITEVER below.
|
||||||
}, false, imgres,0.35,true);
|
}, false, imgres,0.35,true);
|
||||||
}
|
}
|
||||||
|
|
||||||
function self_upload_audio(origAudio)
|
function self_upload_audio(origAudio,filename)
|
||||||
{
|
{
|
||||||
let imgid = "selfuploadaudio"+(Math.floor(10000 + Math.random() * 90000)).toString();
|
let imgid = "selfuploadaudio"+(Math.floor(10000 + Math.random() * 90000)).toString();
|
||||||
let nimgtag = "[<|p|" + imgid + "|p|>]";
|
let nimgtag = "[<|p|" + imgid + "|p|>]";
|
||||||
|
@ -14737,10 +14741,12 @@ Current version indicated by LITEVER below.
|
||||||
image_db[imgid] = { done: false, queue: "Processing", result: "", prompt:"", poll_category:0 };
|
image_db[imgid] = { done: false, queue: "Processing", result: "", prompt:"", poll_category:0 };
|
||||||
image_db[imgid].aspect = 0;
|
image_db[imgid].aspect = 0;
|
||||||
image_db[imgid].imsource = 1; //0=generated,1=uploaded
|
image_db[imgid].imsource = 1; //0=generated,1=uploaded
|
||||||
image_db[imgid].imrefid = "";
|
image_db[imgid].imrefid = filename;
|
||||||
convertAudioToCompressedBase64(origAudio,(newAudio)=>{
|
image_db[imgid].len = 0;
|
||||||
|
convertAudioToCompressedBase64(origAudio,(newAudio,duration)=>{
|
||||||
image_db[imgid].done = true;
|
image_db[imgid].done = true;
|
||||||
image_db[imgid].result = newAudio;
|
image_db[imgid].result = newAudio;
|
||||||
|
image_db[imgid].len = duration;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14760,7 +14766,7 @@ Current version indicated by LITEVER below.
|
||||||
var reader = new FileReader();
|
var reader = new FileReader();
|
||||||
reader.onload = function(event){
|
reader.onload = function(event){
|
||||||
let origImg = event.target.result;
|
let origImg = event.target.result;
|
||||||
self_upload_file_dispatch(origImg);
|
self_upload_file_dispatch(origImg,"");
|
||||||
};
|
};
|
||||||
reader.readAsDataURL(blob);
|
reader.readAsDataURL(blob);
|
||||||
founditem = true;
|
founditem = true;
|
||||||
|
@ -14789,10 +14795,11 @@ Current version indicated by LITEVER below.
|
||||||
console.log(files);
|
console.log(files);
|
||||||
if (files.length > 0 && files[0] != null && files[0].name && files[0].name != "") {
|
if (files.length > 0 && files[0] != null && files[0].name && files[0].name != "") {
|
||||||
const file = files[0];
|
const file = files[0];
|
||||||
|
const fname = files[0].name;
|
||||||
const reader = new FileReader();
|
const reader = new FileReader();
|
||||||
reader.onload = function(img) {
|
reader.onload = function(img) {
|
||||||
let origImg = img.target.result;
|
let origImg = img.target.result;
|
||||||
self_upload_file_dispatch(origImg);
|
self_upload_file_dispatch(origImg, fname);
|
||||||
}
|
}
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
document.getElementById("pasteimgcontainer").classList.add("hidden");
|
document.getElementById("pasteimgcontainer").classList.add("hidden");
|
||||||
|
@ -14875,7 +14882,7 @@ Current version indicated by LITEVER below.
|
||||||
const sy = (videoHeight - sideLength) / 2;
|
const sy = (videoHeight - sideLength) / 2;
|
||||||
context.drawImage(video, sx, sy, sideLength, sideLength, 0, 0, 512, 512);
|
context.drawImage(video, sx, sy, sideLength, sideLength, 0, 0, 512, 512);
|
||||||
const dataURL = canvas.toDataURL('image/png');
|
const dataURL = canvas.toDataURL('image/png');
|
||||||
self_upload_file_dispatch(dataURL); // Call your upload function
|
self_upload_file_dispatch(dataURL,""); // Call your upload function
|
||||||
hide_popups();
|
hide_popups();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14886,10 +14893,11 @@ Current version indicated by LITEVER below.
|
||||||
finput.onchange = (event) => {
|
finput.onchange = (event) => {
|
||||||
if (event.target.files.length > 0 && event.target.files[0]) {
|
if (event.target.files.length > 0 && event.target.files[0]) {
|
||||||
const file = event.target.files[0];
|
const file = event.target.files[0];
|
||||||
|
const fname = file.name;
|
||||||
const reader = new FileReader();
|
const reader = new FileReader();
|
||||||
reader.onload = function(img) {
|
reader.onload = function(img) {
|
||||||
let origImg = img.target.result;
|
let origImg = img.target.result;
|
||||||
self_upload_file_dispatch(origImg);
|
self_upload_file_dispatch(origImg,fname);
|
||||||
}
|
}
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
}
|
}
|
||||||
|
@ -16345,6 +16353,10 @@ Current version indicated by LITEVER below.
|
||||||
{
|
{
|
||||||
submit_payload.params.images = insertAIVisionImages;
|
submit_payload.params.images = insertAIVisionImages;
|
||||||
}
|
}
|
||||||
|
if(is_using_kcpp_with_llava() && insertAIAudioSounds.length>0)
|
||||||
|
{
|
||||||
|
submit_payload.params.audio = insertAIAudioSounds;
|
||||||
|
}
|
||||||
|
|
||||||
if(localsettings.sampler_seed>=1)
|
if(localsettings.sampler_seed>=1)
|
||||||
{
|
{
|
||||||
|
@ -17689,6 +17701,51 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
function zoomed_transcribe_btn(audiohash,onDone)
|
||||||
|
{
|
||||||
|
let fetchedblob = data_hash_to_blob_lookup[audiohash];
|
||||||
|
if(!fetchedblob)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fetch(fetchedblob.blob)
|
||||||
|
.then(x => x.blob())
|
||||||
|
.then(completeRecording => {
|
||||||
|
audioBlobToDecodedAudioBuffer(completeRecording,(buffer)=>{
|
||||||
|
resampleAudioBuffer(buffer,16000,(rsBuffer)=>{
|
||||||
|
let wavblob = audioBufferToWavBlob(rsBuffer);
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = function(audiodata) {
|
||||||
|
let dataurl = audiodata.target.result;
|
||||||
|
let payload = {
|
||||||
|
"audio_data": dataurl,
|
||||||
|
"prompt": "",
|
||||||
|
"suppress_non_speech": (document.getElementById("voice_suppress_nonspeech").checked?true:false),
|
||||||
|
"langcode": document.getElementById("voice_langcode").value
|
||||||
|
};
|
||||||
|
fetch(apply_proxy_url(custom_kobold_endpoint + koboldcpp_transcribe_endpoint), {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify(payload),
|
||||||
|
})
|
||||||
|
.then(x => x.json())
|
||||||
|
.then(resp => {
|
||||||
|
console.log(resp);
|
||||||
|
if(resp && resp.text && resp.text!="")
|
||||||
|
{
|
||||||
|
msgbox(resp.text,"Transcribed Audio");
|
||||||
|
}
|
||||||
|
}).catch((error) => {
|
||||||
|
console.log("Transcribe Error: " + error);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
reader.readAsDataURL(wavblob);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function toggle_ai_vision(imghash)
|
function toggle_ai_vision(imghash)
|
||||||
{
|
{
|
||||||
|
@ -17700,7 +17757,7 @@ Current version indicated by LITEVER below.
|
||||||
{
|
{
|
||||||
//request a new interrogation
|
//request a new interrogation
|
||||||
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
|
var alreadysent = Object.values(interrogation_db).some(item => item.imghash === imghash);
|
||||||
if(!alreadysent)
|
if(!alreadysent && document.getElementById("zoomedimg"))
|
||||||
{
|
{
|
||||||
let b64 = document.getElementById("zoomedimg").src;
|
let b64 = document.getElementById("zoomedimg").src;
|
||||||
interrogate_new_image(b64,imghash,(savedmeta.visionmode==1));
|
interrogate_new_image(b64,imghash,(savedmeta.visionmode==1));
|
||||||
|
@ -17719,10 +17776,33 @@ Current version indicated by LITEVER below.
|
||||||
let savedmeta = completed_imgs_meta[imghash];
|
let savedmeta = completed_imgs_meta[imghash];
|
||||||
if(!savedmeta && imghash!="")
|
if(!savedmeta && imghash!="")
|
||||||
{
|
{
|
||||||
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0};
|
savedmeta = completed_imgs_meta[imghash] = {prompt:"", desc:"", visionmode:0, aspect:0, ref:"", len:0};
|
||||||
|
}
|
||||||
|
if(!savedmeta.visionmode)
|
||||||
|
{
|
||||||
|
savedmeta.visionmode = 0;
|
||||||
|
}
|
||||||
|
let hasllava = is_using_kcpp_with_llava();
|
||||||
|
let visionstatus = "";
|
||||||
|
if(savedmeta.visionmode==3)
|
||||||
|
{
|
||||||
|
if(custom_kobold_endpoint!="") //on a kobo endpoint
|
||||||
|
{
|
||||||
|
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(hasllava?`<span class="color_green">Active</span>`:`<span class="color_yellow">Unsupported</span>`));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
let isoai = (custom_oai_key!="" && document.getElementById("useoaichatcompl").checked);
|
||||||
|
let isgemini = (custom_gemini_key!="");
|
||||||
|
visionstatus = (isoai?`<span class="color_green">OpenAI API (Conditional)</span>`:(isgemini?`<span class="color_green">Gemini API (Conditional)</span>`:`<span class="color_yellow">Unsupported</span>`));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(savedmeta.desc?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing</span>`));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(savedmeta)
|
if(savedmeta && document.getElementById("zoomedimg"))
|
||||||
{
|
{
|
||||||
document.getElementById("zoomedimg").classList.remove("portrait");
|
document.getElementById("zoomedimg").classList.remove("portrait");
|
||||||
document.getElementById("zoomedimg").classList.remove("landscape");
|
document.getElementById("zoomedimg").classList.remove("landscape");
|
||||||
|
@ -17745,32 +17825,8 @@ Current version indicated by LITEVER below.
|
||||||
document.getElementById("zoomedimg").classList.add("landscape_long");
|
document.getElementById("zoomedimg").classList.add("landscape_long");
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!savedmeta.visionmode)
|
|
||||||
{
|
|
||||||
savedmeta.visionmode = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description");
|
let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description");
|
||||||
latest_orig_prompt = origprompt;
|
latest_orig_prompt = origprompt;
|
||||||
let hasllava = is_using_kcpp_with_llava();
|
|
||||||
let visionstatus = "";
|
|
||||||
if(savedmeta.visionmode==3)
|
|
||||||
{
|
|
||||||
if(custom_kobold_endpoint!="") //on a kobo endpoint
|
|
||||||
{
|
|
||||||
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(hasllava?`<span class="color_green">Active</span>`:`<span class="color_yellow">Unsupported</span>`));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
let isoai = (custom_oai_key!="" && document.getElementById("useoaichatcompl").checked);
|
|
||||||
let isgemini = (custom_gemini_key!="");
|
|
||||||
visionstatus = (isoai?`<span class="color_green">OpenAI API (Conditional)</span>`:(isgemini?`<span class="color_green">Gemini API (Conditional)</span>`:`<span class="color_yellow">Unsupported</span>`));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(savedmeta.desc?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing</span>`));
|
|
||||||
}
|
|
||||||
|
|
||||||
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 140px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
|
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 140px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
|
||||||
<option value="0">Disabled</option>
|
<option value="0">Disabled</option>
|
||||||
|
@ -17786,6 +17842,25 @@ Current version indicated by LITEVER below.
|
||||||
`;
|
`;
|
||||||
document.getElementById("aivisionmode").value = savedmeta.visionmode;
|
document.getElementById("aivisionmode").value = savedmeta.visionmode;
|
||||||
}
|
}
|
||||||
|
else if(savedmeta && document.getElementById("zoomedaudio"))
|
||||||
|
{
|
||||||
|
let transcribebtn = "";
|
||||||
|
if(is_using_kcpp_with_whisper())
|
||||||
|
{
|
||||||
|
transcribebtn = `<button type="button" class="btn btn-primary" style="width: 140px; padding: 2px; margin: 3px; font-size:12px;" onclick="zoomed_transcribe_btn(\'`+imghash+`\')">Transcribe Audio</button>`;
|
||||||
|
}
|
||||||
|
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 140px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
|
||||||
|
<option value="0">Disabled</option>
|
||||||
|
<option value="3">Multimodal Audio</option>
|
||||||
|
</select>`;
|
||||||
|
document.getElementById("zoomedimgdesc").innerHTML = `
|
||||||
|
AI Embed Audio: `+visionstatus+` <span class="helpicon">?<span class="helptext">Allows the AI to hear and react to this audio (on supported models). Transcribe tries to replace the audio file with detected speech.</span></span>
|
||||||
|
${togglebtn}
|
||||||
|
<br>
|
||||||
|
${transcribebtn}
|
||||||
|
`;
|
||||||
|
document.getElementById("aivisionmode").value = savedmeta.visionmode;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
document.getElementById("zoomedimgdesc").innerText = "No Saved Data";
|
document.getElementById("zoomedimgdesc").innerText = "No Saved Data";
|
||||||
|
@ -17801,7 +17876,7 @@ Current version indicated by LITEVER below.
|
||||||
{
|
{
|
||||||
inputBox("Enter prompt to create a new image, based on this source image.","Create Img2Img","","Enter Img2Img Prompt",()=>{
|
inputBox("Enter prompt to create a new image, based on this source image.","Create Img2Img","","Enter Img2Img Prompt",()=>{
|
||||||
let userinput = getInputBoxValue();
|
let userinput = getInputBoxValue();
|
||||||
if(userinput.trim()!="")
|
if(userinput.trim()!="" && document.getElementById("zoomedimg"))
|
||||||
{
|
{
|
||||||
var sentence = userinput.trim().substring(0, 380);
|
var sentence = userinput.trim().substring(0, 380);
|
||||||
let b64 = document.getElementById("zoomedimg").src;
|
let b64 = document.getElementById("zoomedimg").src;
|
||||||
|
@ -17812,26 +17887,60 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
function click_image(target,imghash)
|
function click_image(target,imghash)
|
||||||
{
|
{
|
||||||
|
clear_zoomed_img_and_audio();
|
||||||
if(target)
|
if(target)
|
||||||
{
|
{
|
||||||
if(localsettings.invert_colors)
|
|
||||||
{
|
|
||||||
document.getElementById("zoomedimg").classList.add("invert_colors");
|
|
||||||
}else{
|
|
||||||
document.getElementById("zoomedimg").classList.remove("invert_colors");
|
|
||||||
}
|
|
||||||
document.getElementById("zoomedimgcontainer").classList.remove("hidden");
|
document.getElementById("zoomedimgcontainer").classList.remove("hidden");
|
||||||
document.getElementById("zoomedimg").src = target.src;
|
let src = `<img class="zoomedimg ${localsettings.invert_colors?"invert_colors":""}" id="zoomedimg" src="${target.src}">`;
|
||||||
|
document.getElementById("zoomedimgdiv").innerHTML = src;
|
||||||
|
document.getElementById("zoomedimgdiv").classList.remove("hidden");
|
||||||
|
document.getElementById("zoomedaudiodiv").classList.add("hidden");
|
||||||
update_clicked_image(imghash);
|
update_clicked_image(imghash);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
function delete_curr_image()
|
function click_audio(target,audiohash,audioblob)
|
||||||
{
|
{
|
||||||
let removesrc = document.getElementById("zoomedimg").src;
|
clear_zoomed_img_and_audio();
|
||||||
if (removesrc && removesrc != "") {
|
if(target)
|
||||||
var matchingStr = ("[<|d|" + removesrc + "|d|>]")
|
{
|
||||||
|
document.getElementById("zoomedimgcontainer").classList.remove("hidden");
|
||||||
|
document.getElementById("zoomedimgdiv").classList.add("hidden");
|
||||||
|
document.getElementById("zoomedaudiodiv").classList.remove("hidden");
|
||||||
|
let src = `<div><audio controls title="AudioPlayer"><source src="${audioblob}" id="zoomedaudio" type="audio/mp3"></audio></div>`;
|
||||||
|
document.getElementById("zoomedaudiodiv").innerHTML = src;
|
||||||
|
update_clicked_image(audiohash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function clear_zoomed_img_and_audio()
|
||||||
|
{
|
||||||
|
document.getElementById("zoomedimgdiv").innerHTML = "";
|
||||||
|
document.getElementById("zoomedaudiodiv").innerHTML = "";
|
||||||
|
}
|
||||||
|
function delete_curr_media()
|
||||||
|
{
|
||||||
|
let zoomedimg = document.getElementById("zoomedimg");
|
||||||
|
let zoomedaudio = document.getElementById("zoomedaudio");
|
||||||
|
let targettoremove = "";
|
||||||
|
if (zoomedimg && zoomedimg.src && zoomedimg.src !="") {
|
||||||
|
targettoremove = zoomedimg.src;
|
||||||
|
}
|
||||||
|
else if(zoomedaudio && zoomedaudio.src && zoomedaudio.src !="")
|
||||||
|
{
|
||||||
|
let blobid = zoomedaudio.src;
|
||||||
|
for(v in data_hash_to_blob_lookup)
|
||||||
|
{
|
||||||
|
let itm = data_hash_to_blob_lookup[v];
|
||||||
|
if(itm.id==blobid)
|
||||||
|
{
|
||||||
|
targettoremove = itm.original;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(targettoremove)
|
||||||
|
{
|
||||||
|
var matchingStr = ("[<|d|" + targettoremove + "|d|>]")
|
||||||
for (let i = 0; i < gametext_arr.length; ++i) {
|
for (let i = 0; i < gametext_arr.length; ++i) {
|
||||||
if (gametext_arr[i].includes(matchingStr)) {
|
if (gametext_arr[i].includes(matchingStr)) {
|
||||||
gametext_arr[i] = gametext_arr[i].replace(matchingStr, "");
|
gametext_arr[i] = gametext_arr[i].replace(matchingStr, "");
|
||||||
|
@ -17883,9 +17992,17 @@ Current version indicated by LITEVER below.
|
||||||
|
|
||||||
function render_audio_html(data)
|
function render_audio_html(data)
|
||||||
{
|
{
|
||||||
let audioblob = b64_to_persistent_blob(data);
|
let audiohash = cyrb_hash(data).trim();
|
||||||
const reinvertcolor = localsettings.invert_colors?"invert_colors":"";
|
let audioblob = b64_to_persistent_blob(data,audiohash);
|
||||||
const str = `<div style="display:flex" class="${reinvertcolor}" contenteditable="false"><audio style="display:flex" controls title="Audio Player"><source src="${audioblob}" type="audio/mp3"></audio></div>`;
|
let filename = "";
|
||||||
|
let len = 0;
|
||||||
|
if (completed_imgs_meta[audiohash] != null) {
|
||||||
|
filename = completed_imgs_meta[audiohash].ref;
|
||||||
|
len = completed_imgs_meta[audiohash].len;
|
||||||
|
}
|
||||||
|
let fndisp = filename!=""?`(${filename.substring(0,50)}) `:"";
|
||||||
|
fndisp = len?(`: ${Math.floor(len)}s ${fndisp}`):fndisp;
|
||||||
|
const str = `<span><br><button type="button" title="Attached Audio" class="btn btn-primary" style="font-size:12px; padding:8px 8px; border-radius: 16px" onclick="return click_audio(this,\'${audiohash}\',\'${audioblob}\');">Attached Audio ${fndisp}🔊</button><br></span>`;
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18472,7 +18589,7 @@ Current version indicated by LITEVER below.
|
||||||
gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
|
gametext_arr[i] = gametext_arr[i].replace(matchstr, newstr);
|
||||||
let metaid = cyrb_hash(img.result);
|
let metaid = cyrb_hash(img.result);
|
||||||
//default to llava if supported, and image is self uploaded
|
//default to llava if supported, and image is self uploaded
|
||||||
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:((image_db[key].imsource==1 && is_using_kcpp_with_llava())?3:0), aspect:image_db[key].aspect};
|
completed_imgs_meta[metaid] = {prompt:image_db[key].prompt, desc:"", visionmode:((image_db[key].imsource==1 && is_using_kcpp_with_llava())?3:0), aspect:image_db[key].aspect, ref:image_db[key].imrefid, len:image_db[key].len};
|
||||||
delete image_db[key];
|
delete image_db[key];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18686,13 +18803,13 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
|
|
||||||
// AUDIO MANIPULATION FUNCTIONS
|
// AUDIO MANIPULATION FUNCTIONS
|
||||||
//convert any audio to a webm blob (high compression), returns a promise
|
//convert any audio to a webm blob (high compression)
|
||||||
function convertAudioToCompressedBase64(inputBase64, onDone) {
|
function convertAudioToCompressedBase64(inputBase64, onDone) {
|
||||||
// Step 1: Convert base64 string to Blob
|
// Step 1: Convert base64 string to Blob
|
||||||
const matches = inputBase64.match(/^data:(audio\/[a-zA-Z0-9-]+);base64,(.+)$/);
|
const matches = inputBase64.match(/^data:(audio\/[a-zA-Z0-9-]+);base64,(.+)$/);
|
||||||
if (!matches) {
|
if (!matches) {
|
||||||
console.log("Convert Audio: Invalid base64 input");
|
console.log("Convert Audio: Invalid base64 input");
|
||||||
onDone(null);
|
onDone(null,null);
|
||||||
}
|
}
|
||||||
|
|
||||||
const mimeType = matches[1];
|
const mimeType = matches[1];
|
||||||
|
@ -18712,6 +18829,7 @@ Current version indicated by LITEVER below.
|
||||||
audioContext.decodeAudioData(arrayBuffer, function (buffer) {
|
audioContext.decodeAudioData(arrayBuffer, function (buffer) {
|
||||||
const samplefreq = buffer.sampleRate;
|
const samplefreq = buffer.sampleRate;
|
||||||
const samples = buffer.getChannelData(0); // mono
|
const samples = buffer.getChannelData(0); // mono
|
||||||
|
const durationInSeconds = buffer.duration;
|
||||||
const mp3encoder = new lamejs.Mp3Encoder(1, samplefreq, 40); // mono, 16kHz, 40kbps
|
const mp3encoder = new lamejs.Mp3Encoder(1, samplefreq, 40); // mono, 16kHz, 40kbps
|
||||||
const sampleBlockSize = 1152; //can be anything but make it a multiple of 576 to make encoders life easier
|
const sampleBlockSize = 1152; //can be anything but make it a multiple of 576 to make encoders life easier
|
||||||
let mp3Data = [];
|
let mp3Data = [];
|
||||||
|
@ -18737,12 +18855,12 @@ Current version indicated by LITEVER below.
|
||||||
const fileReader = new FileReader();
|
const fileReader = new FileReader();
|
||||||
fileReader.onloadend = function () {
|
fileReader.onloadend = function () {
|
||||||
const mp3Base64 = fileReader.result;
|
const mp3Base64 = fileReader.result;
|
||||||
onDone(mp3Base64);
|
onDone(mp3Base64,durationInSeconds);
|
||||||
};
|
};
|
||||||
fileReader.readAsDataURL(mp3Blob);
|
fileReader.readAsDataURL(mp3Blob);
|
||||||
}, function (err) {
|
}, function (err) {
|
||||||
console.log("Audio decode failed.");
|
console.log("Audio decode failed.");
|
||||||
onDone(null);
|
onDone(null,null);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
reader.readAsArrayBuffer(inputBlob);
|
reader.readAsArrayBuffer(inputBlob);
|
||||||
|
@ -19574,6 +19692,7 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
|
|
||||||
var insertAIVisionImages = []; //concat gametext will populate this
|
var insertAIVisionImages = []; //concat gametext will populate this
|
||||||
|
var insertAIAudioSounds = [];
|
||||||
function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false,insertAIVision=false) {
|
function concat_gametext(stripimg = false, stripimg_replace_str = "", append_before_segment="",append_after_segment="",escapeTxt=false,insertAIVision=false) {
|
||||||
let fulltxt = "";
|
let fulltxt = "";
|
||||||
for (let i = 0; i < gametext_arr.length; ++i) {
|
for (let i = 0; i < gametext_arr.length; ++i) {
|
||||||
|
@ -19637,6 +19756,7 @@ Current version indicated by LITEVER below.
|
||||||
if(insertAIVision)
|
if(insertAIVision)
|
||||||
{
|
{
|
||||||
insertAIVisionImages = []; //a bit hacky
|
insertAIVisionImages = []; //a bit hacky
|
||||||
|
insertAIAudioSounds = [];
|
||||||
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
|
fulltxt = fulltxt.replace(/\[<\|d\|.+?\|d\|>\]/g, function (m) {
|
||||||
// m here means the whole matched string
|
// m here means the whole matched string
|
||||||
let inner = m.substring(5, m.length - 5);
|
let inner = m.substring(5, m.length - 5);
|
||||||
|
@ -19649,11 +19769,18 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
else if(foundmeta.visionmode==3)
|
else if(foundmeta.visionmode==3)
|
||||||
{
|
{
|
||||||
|
let placeholder = "";
|
||||||
let parts = inner.split(',');
|
let parts = inner.split(',');
|
||||||
if (parts.length === 2 && parts[0].startsWith('data:image')) {
|
if (parts.length === 2 && parts[0].startsWith('data:image')) {
|
||||||
insertAIVisionImages.push(parts[1]);
|
insertAIVisionImages.push(parts[1]);
|
||||||
|
placeholder = "\n(Attached Image)\n";
|
||||||
}
|
}
|
||||||
return "\n(Attached Image)\n";
|
else if(parts.length === 2 && parts[0].startsWith('data:audio'))
|
||||||
|
{
|
||||||
|
insertAIAudioSounds.push(parts[1]);
|
||||||
|
placeholder = "\n(Attached Audio)\n";
|
||||||
|
}
|
||||||
|
return placeholder;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
|
@ -25798,11 +25925,12 @@ Current version indicated by LITEVER below.
|
||||||
<div class="popupbg flex"></div>
|
<div class="popupbg flex"></div>
|
||||||
<div class="nspopup flexsize highest">
|
<div class="nspopup flexsize highest">
|
||||||
<div class="popuptitlebar">
|
<div class="popuptitlebar">
|
||||||
<div class="popuptitletext">Image Information</div>
|
<div class="popuptitletext">Media Information</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="zoomedimgdiv">
|
<div id="zoomedimgdiv" class="zoomedimgdiv">
|
||||||
<img class="zoomedimg" id="zoomedimg" src="">
|
</div>
|
||||||
|
<div id="zoomedaudiodiv" class="zoomedimgdiv">
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="menutext zoomedimgdesc" id="zoomedimgdesc" style="word-wrap: break-word;">
|
<div class="menutext zoomedimgdesc" id="zoomedimgdesc" style="word-wrap: break-word;">
|
||||||
|
@ -25810,8 +25938,8 @@ Current version indicated by LITEVER below.
|
||||||
</div>
|
</div>
|
||||||
<br>
|
<br>
|
||||||
<div class="popupfooter">
|
<div class="popupfooter">
|
||||||
<button type="button" class="bg_red btn btn-primary" style="width: 124px;" onclick="delete_curr_image();hide_popups();">Delete Image</button>
|
<button type="button" class="bg_red btn btn-primary" style="width: 124px;" onclick="delete_curr_media();clear_zoomed_img_and_audio();hide_popups();">Delete Media</button>
|
||||||
<button type="button" class="btn btn-primary" onclick="hide_popups()">Close</button>
|
<button type="button" class="btn btn-primary" onclick="clear_zoomed_img_and_audio();hide_popups()">Close</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -454,7 +454,7 @@ bool audio_embd_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const whi
|
||||||
batch_f32.entries.push_back(std::move(mel_f32));
|
batch_f32.entries.push_back(std::move(mel_f32));
|
||||||
|
|
||||||
int n_mmproj_embd = clip_n_mmproj_embd(ctx_clip);
|
int n_mmproj_embd = clip_n_mmproj_embd(ctx_clip);
|
||||||
float * audio_embd = (float *)malloc(n_tokens * n_mmproj_embd);
|
float * audio_embd = (float *)malloc(n_tokens * n_mmproj_embd * sizeof(float));
|
||||||
bool ok = clip_image_batch_encode(
|
bool ok = clip_image_batch_encode(
|
||||||
ctx_clip,
|
ctx_clip,
|
||||||
n_threads,
|
n_threads,
|
||||||
|
@ -462,5 +462,5 @@ bool audio_embd_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const whi
|
||||||
audio_embd);
|
audio_embd);
|
||||||
*image_embd_out = audio_embd;
|
*image_embd_out = audio_embd;
|
||||||
*n_img_pos_out = n_tokens;
|
*n_img_pos_out = n_tokens;
|
||||||
return ok ? 0 : 1;
|
return ok;
|
||||||
}
|
}
|
|
@ -29,7 +29,7 @@
|
||||||
#define MA_API static
|
#define MA_API static
|
||||||
#include "miniaudio/miniaudio.h"
|
#include "miniaudio/miniaudio.h"
|
||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
// #define STB_IMAGE_IMPLEMENTATION
|
||||||
#include "stb/stb_image.h"
|
#include "stb/stb_image.h"
|
||||||
|
|
||||||
#define LOG_INF(...) fprintf(stdout, __VA_ARGS__)
|
#define LOG_INF(...) fprintf(stdout, __VA_ARGS__)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue