mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 16:31:59 +00:00
increase max images, take images from the end instead of beginning if too many images
This commit is contained in:
parent
4a5c903718
commit
c91f350ed5
4 changed files with 27 additions and 22 deletions
|
|
@ -13979,9 +13979,9 @@ Current version indicated by LITEVER below.
|
|||
},()=>{
|
||||
});
|
||||
}
|
||||
else if(localflag && koboldcpp_has_musicgen && no_txt_model)
|
||||
else if(localflag && (koboldcpp_has_musicgen||koboldcpp_has_tts) && no_txt_model)
|
||||
{
|
||||
msgboxYesNo("This KoboldCpp instance seems to be running an Music Generation model without any Text Generation model loaded.\n\nWould you like to launch MusicUI (Dedicated Music Generation WebUI bundled with KoboldCpp)?\n\nIf unsure, select 'Yes'.","Launch MusicUI?", ()=>{
|
||||
msgboxYesNo("This KoboldCpp instance seems to be running an Music or Speech Generation model without any Text Generation model loaded.\n\nWould you like to launch MusicUI (Dedicated Audio Generation WebUI bundled with KoboldCpp)?\n\nIf unsure, select 'Yes'.","Launch MusicUI?", ()=>{
|
||||
go_to_musicui();
|
||||
},()=>{
|
||||
});
|
||||
|
|
|
|||
8
expose.h
8
expose.h
|
|
@ -2,8 +2,6 @@
|
|||
#include <cstdint>
|
||||
|
||||
const int tensor_split_max = 16;
|
||||
const int images_max = 8;
|
||||
const int audio_max = 4;
|
||||
const int logprobs_max = 10;
|
||||
const int overridekv_max = 16;
|
||||
|
||||
|
|
@ -91,8 +89,10 @@ struct generation_inputs
|
|||
const char * memory = nullptr;
|
||||
const char * negative_prompt = nullptr;
|
||||
const float guidance_scale = 1;
|
||||
const char * images[images_max] = {};
|
||||
const char * audio[audio_max] = {};
|
||||
const int images_len = 0;
|
||||
const char ** images = nullptr;
|
||||
const int audio_len = 0;
|
||||
const char ** audio = nullptr;
|
||||
const int max_context_length = 0;
|
||||
const int max_length = 0;
|
||||
const float temperature = 0.0f;
|
||||
|
|
|
|||
|
|
@ -3648,7 +3648,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
}
|
||||
media_objects.clear();
|
||||
std::string new_media_composite = "";
|
||||
for(int x=0;x<images_max;++x)
|
||||
|
||||
for(int x=0;x<inputs.images_len;++x)
|
||||
{
|
||||
std::string item = inputs.images[x];
|
||||
if(item!="")
|
||||
|
|
@ -3678,7 +3679,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
new_media_composite += item;
|
||||
}
|
||||
}
|
||||
for(int x=0;x<audio_max;++x)
|
||||
for(int x=0;x<inputs.audio_len;++x)
|
||||
{
|
||||
std::string item = inputs.audio[x];
|
||||
if(item!="")
|
||||
|
|
|
|||
32
koboldcpp.py
32
koboldcpp.py
|
|
@ -45,8 +45,8 @@ import queue
|
|||
# constants
|
||||
sampler_order_max = 7
|
||||
tensor_split_max = 16
|
||||
images_max = 8
|
||||
audio_max = 4
|
||||
images_max = 16
|
||||
audio_max = 16
|
||||
bias_min_value = -100.0
|
||||
bias_max_value = 100.0
|
||||
logprobs_max = 10
|
||||
|
|
@ -251,8 +251,10 @@ class generation_inputs(ctypes.Structure):
|
|||
("memory", ctypes.c_char_p),
|
||||
("negative_prompt", ctypes.c_char_p),
|
||||
("guidance_scale", ctypes.c_float),
|
||||
("images", ctypes.c_char_p * images_max),
|
||||
("audio", ctypes.c_char_p * audio_max),
|
||||
("images_len", ctypes.c_int),
|
||||
("images", ctypes.POINTER(ctypes.c_char_p)),
|
||||
("audio_len", ctypes.c_int),
|
||||
("audio", ctypes.POINTER(ctypes.c_char_p)),
|
||||
("max_context_length", ctypes.c_int),
|
||||
("max_length", ctypes.c_int),
|
||||
("temperature", ctypes.c_float),
|
||||
|
|
@ -1898,16 +1900,18 @@ def generate(genparams, stream_flag=False):
|
|||
inputs.memory = memory.encode("UTF-8")
|
||||
inputs.negative_prompt = negative_prompt.encode("UTF-8")
|
||||
inputs.guidance_scale = guidance_scale
|
||||
for n in range(images_max):
|
||||
if not images or n >= len(images):
|
||||
inputs.images[n] = "".encode("UTF-8")
|
||||
else:
|
||||
inputs.images[n] = images[n].encode("UTF-8")
|
||||
for n in range(audio_max):
|
||||
if not audio or n >= len(audio):
|
||||
inputs.audio[n] = "".encode("UTF-8")
|
||||
else:
|
||||
inputs.audio[n] = audio[n].encode("UTF-8")
|
||||
|
||||
images = images[-images_max:]
|
||||
inputs.images_len = len(images)
|
||||
inputs.images = (ctypes.c_char_p * inputs.images_len)()
|
||||
for n, item in enumerate(images):
|
||||
inputs.images[n] = item.encode("UTF-8")
|
||||
audio = audio[-audio_max:]
|
||||
inputs.audio_len = len(audio)
|
||||
inputs.audio = (ctypes.c_char_p * inputs.audio_len)()
|
||||
for n, item in enumerate(audio):
|
||||
inputs.audio[n] = item.encode("UTF-8")
|
||||
|
||||
global showmaxctxwarning
|
||||
if max_context_length > maxctx:
|
||||
if showmaxctxwarning:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue