increase max images, take images from the end instead of beginning if too many images

This commit is contained in:
Concedo 2026-03-26 23:03:52 +08:00
parent 4a5c903718
commit c91f350ed5
4 changed files with 27 additions and 22 deletions

View file

@ -13979,9 +13979,9 @@ Current version indicated by LITEVER below.
},()=>{
});
}
else if(localflag && koboldcpp_has_musicgen && no_txt_model)
else if(localflag && (koboldcpp_has_musicgen||koboldcpp_has_tts) && no_txt_model)
{
msgboxYesNo("This KoboldCpp instance seems to be running an Music Generation model without any Text Generation model loaded.\n\nWould you like to launch MusicUI (Dedicated Music Generation WebUI bundled with KoboldCpp)?\n\nIf unsure, select 'Yes'.","Launch MusicUI?", ()=>{
msgboxYesNo("This KoboldCpp instance seems to be running an Music or Speech Generation model without any Text Generation model loaded.\n\nWould you like to launch MusicUI (Dedicated Audio Generation WebUI bundled with KoboldCpp)?\n\nIf unsure, select 'Yes'.","Launch MusicUI?", ()=>{
go_to_musicui();
},()=>{
});

View file

@ -2,8 +2,6 @@
#include <cstdint>
const int tensor_split_max = 16;
const int images_max = 8;
const int audio_max = 4;
const int logprobs_max = 10;
const int overridekv_max = 16;
@ -91,8 +89,10 @@ struct generation_inputs
const char * memory = nullptr;
const char * negative_prompt = nullptr;
const float guidance_scale = 1;
const char * images[images_max] = {};
const char * audio[audio_max] = {};
const int images_len = 0;
const char ** images = nullptr;
const int audio_len = 0;
const char ** audio = nullptr;
const int max_context_length = 0;
const int max_length = 0;
const float temperature = 0.0f;

View file

@ -3648,7 +3648,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
}
media_objects.clear();
std::string new_media_composite = "";
for(int x=0;x<images_max;++x)
for(int x=0;x<inputs.images_len;++x)
{
std::string item = inputs.images[x];
if(item!="")
@ -3678,7 +3679,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
new_media_composite += item;
}
}
for(int x=0;x<audio_max;++x)
for(int x=0;x<inputs.audio_len;++x)
{
std::string item = inputs.audio[x];
if(item!="")

View file

@ -45,8 +45,8 @@ import queue
# constants
sampler_order_max = 7
tensor_split_max = 16
images_max = 8
audio_max = 4
images_max = 16
audio_max = 16
bias_min_value = -100.0
bias_max_value = 100.0
logprobs_max = 10
@ -251,8 +251,10 @@ class generation_inputs(ctypes.Structure):
("memory", ctypes.c_char_p),
("negative_prompt", ctypes.c_char_p),
("guidance_scale", ctypes.c_float),
("images", ctypes.c_char_p * images_max),
("audio", ctypes.c_char_p * audio_max),
("images_len", ctypes.c_int),
("images", ctypes.POINTER(ctypes.c_char_p)),
("audio_len", ctypes.c_int),
("audio", ctypes.POINTER(ctypes.c_char_p)),
("max_context_length", ctypes.c_int),
("max_length", ctypes.c_int),
("temperature", ctypes.c_float),
@ -1898,16 +1900,18 @@ def generate(genparams, stream_flag=False):
inputs.memory = memory.encode("UTF-8")
inputs.negative_prompt = negative_prompt.encode("UTF-8")
inputs.guidance_scale = guidance_scale
for n in range(images_max):
if not images or n >= len(images):
inputs.images[n] = "".encode("UTF-8")
else:
inputs.images[n] = images[n].encode("UTF-8")
for n in range(audio_max):
if not audio or n >= len(audio):
inputs.audio[n] = "".encode("UTF-8")
else:
inputs.audio[n] = audio[n].encode("UTF-8")
images = images[-images_max:]
inputs.images_len = len(images)
inputs.images = (ctypes.c_char_p * inputs.images_len)()
for n, item in enumerate(images):
inputs.images[n] = item.encode("UTF-8")
audio = audio[-audio_max:]
inputs.audio_len = len(audio)
inputs.audio = (ctypes.c_char_p * inputs.audio_len)()
for n, item in enumerate(audio):
inputs.audio[n] = item.encode("UTF-8")
global showmaxctxwarning
if max_context_length > maxctx:
if showmaxctxwarning: