mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
fixed smartcache bug when used with images
This commit is contained in:
parent
774841ffd6
commit
bfa2ae7744
3 changed files with 14 additions and 2 deletions
|
|
@ -4442,7 +4442,7 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
if (typeof obj === 'object') {
|
||||
const result = {};
|
||||
for (const key in obj) {
|
||||
for (let key in obj) {
|
||||
if (obj.hasOwnProperty(key)) {
|
||||
result[key] = replaceStringsInObject(obj[key], src, tgt);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4019,6 +4019,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
for(int i=0;i<savestate_limit;++i)
|
||||
{
|
||||
bool target_usable = FullyContainedPrefix(savestates[i].savestate_context_tokens,embd_inp);
|
||||
if(savestates[i].media_signature!=media_composite_image_signature)
|
||||
{
|
||||
target_usable = false;
|
||||
}
|
||||
int target_len = savestates[i].savestate_context_tokens.size();
|
||||
if(target_usable && target_len>bestlen)
|
||||
{
|
||||
|
|
@ -4085,6 +4089,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
for(int i=0;i<savestate_limit;++i)
|
||||
{
|
||||
float similaritybeat = ComputePrefixMatchPercent(savestates[i].savestate_context_tokens,embd_inp);
|
||||
if(savestates[i].media_signature!=media_composite_image_signature)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if(similaritybeat > similarity_threshold || (shiftable && CanContextShift(savestates[i].savestate_context_tokens, embd_inp, inputs.max_length, nctx)))
|
||||
{
|
||||
//found a match. save to the oldest slot thats not the one we are loading
|
||||
|
|
@ -5110,6 +5118,7 @@ size_t gpttype_save_state_kv(int slot)
|
|||
savestates[slot].savestate_context_tokens.clear();
|
||||
savestates[slot].current_savestate_size = 0;
|
||||
savestates[slot].current_draft_savestate_size = 0;
|
||||
savestates[slot].media_signature = "";
|
||||
}
|
||||
size_t newsize = llama_state_get_size(llama_ctx_v4);
|
||||
try {
|
||||
|
|
@ -5127,6 +5136,7 @@ size_t gpttype_save_state_kv(int slot)
|
|||
totalbytes += res;
|
||||
savestates[slot].current_savestate_size = newsize;
|
||||
savestates[slot].savestate_context_tokens = current_context_tokens;
|
||||
savestates[slot].media_signature = media_composite_image_signature;
|
||||
int maxedpos = llama_memory_seq_pos_max(llama_get_memory(llama_ctx_v4),0);
|
||||
if(maxedpos > 0 && savestates[slot].savestate_context_tokens.size() > maxedpos && savestates[slot].savestate_context_tokens.size()-maxedpos<=2)
|
||||
{
|
||||
|
|
@ -5211,6 +5221,7 @@ bool gpttype_clear_state_kv(bool shrink)
|
|||
}
|
||||
savestates[slot].savestate_context_tokens.clear();
|
||||
savestates[slot].current_savestate_size = 0;
|
||||
savestates[slot].media_signature = "";
|
||||
if(draft_ctx && savestates[slot].current_draft_savestate_size>0)
|
||||
{
|
||||
savestates[slot].current_draft_savestate_buffer.clear();
|
||||
|
|
@ -5240,7 +5251,7 @@ int get_identical_existing_slot() //returns slot number of slot containing exact
|
|||
int currctxsize = current_context_tokens.size();
|
||||
for(int i=0;i<savestate_limit;++i)
|
||||
{
|
||||
if(savestates[i].savestate_context_tokens.size() == currctxsize)
|
||||
if(savestates[i].savestate_context_tokens.size() == currctxsize && savestates[i].media_signature==media_composite_image_signature)
|
||||
{
|
||||
bool is_identical = true;
|
||||
const auto& slot_tokens = savestates[i].savestate_context_tokens;
|
||||
|
|
|
|||
|
|
@ -538,6 +538,7 @@ struct savestate_data
|
|||
std::vector<uint8_t> current_draft_savestate_buffer;
|
||||
std::vector<gpt_vocab::id> savestate_context_tokens; //for context clones
|
||||
int64_t last_used = 0; //unix timestamp, updated on save or load
|
||||
std::string media_signature = "";
|
||||
};
|
||||
|
||||
const float default_norm_eps = 1e-5f;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue