mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
save and load state upgraded to 3 available states
This commit is contained in:
parent
06d2bc3404
commit
736030bb9f
7 changed files with 206 additions and 82 deletions
16
expose.cpp
16
expose.cpp
|
@ -380,21 +380,21 @@ extern "C"
|
||||||
{
|
{
|
||||||
return gpttype_calc_new_state_tokencount();
|
return gpttype_calc_new_state_tokencount();
|
||||||
}
|
}
|
||||||
size_t calc_old_state_kv() //returns how much memory current savestate is using
|
size_t calc_old_state_kv(int slot) //returns how much memory current savestate is using
|
||||||
{
|
{
|
||||||
return gpttype_calc_old_state_kv();
|
return gpttype_calc_old_state_kv(slot);
|
||||||
}
|
}
|
||||||
size_t calc_old_state_tokencount()
|
size_t calc_old_state_tokencount(int slot)
|
||||||
{
|
{
|
||||||
return gpttype_calc_old_state_tokencount();
|
return gpttype_calc_old_state_tokencount(slot);
|
||||||
}
|
}
|
||||||
size_t save_state_kv() //triggers the save kv state of current ctx to memory
|
size_t save_state_kv(int slot) //triggers the save kv state of current ctx to memory
|
||||||
{
|
{
|
||||||
return gpttype_save_state_kv();
|
return gpttype_save_state_kv(slot);
|
||||||
}
|
}
|
||||||
bool load_state_kv() //triggers the load kv state of current ctx to memory
|
bool load_state_kv(int slot) //triggers the load kv state of current ctx to memory
|
||||||
{
|
{
|
||||||
return gpttype_load_state_kv();
|
return gpttype_load_state_kv(slot);
|
||||||
}
|
}
|
||||||
bool clear_state_kv()
|
bool clear_state_kv()
|
||||||
{
|
{
|
||||||
|
|
|
@ -142,9 +142,8 @@ static int delayed_generated_tokens_limit = 0;
|
||||||
std::deque<std::string> delayed_generated_tokens; //for use with antislop sampling
|
std::deque<std::string> delayed_generated_tokens; //for use with antislop sampling
|
||||||
static std::map<int,std::vector<int>> antislop_banned_token_ids; //first is the npast position, second is the array of banned ids at that index
|
static std::map<int,std::vector<int>> antislop_banned_token_ids; //first is the npast position, second is the array of banned ids at that index
|
||||||
|
|
||||||
static size_t current_savestate_size = 0;
|
const int savestate_limit = 3;
|
||||||
static std::vector<uint8_t> current_savestate_buffer;
|
static savestate_data savestates[savestate_limit];
|
||||||
static std::vector<gpt_vocab::id> savestate_context_tokens; //for context clones
|
|
||||||
|
|
||||||
inline int kcpp_cpu_has_blas(void) {
|
inline int kcpp_cpu_has_blas(void) {
|
||||||
#if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_SYCL)
|
#if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_SYCL)
|
||||||
|
@ -4327,19 +4326,19 @@ size_t gpttype_calc_new_state_kv()
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
size_t gpttype_calc_old_state_kv()
|
size_t gpttype_calc_old_state_kv(int slot)
|
||||||
{
|
{
|
||||||
return current_savestate_size;
|
return savestates[slot].current_savestate_size;
|
||||||
}
|
}
|
||||||
size_t gpttype_calc_old_state_tokencount()
|
size_t gpttype_calc_old_state_tokencount(int slot)
|
||||||
{
|
{
|
||||||
return savestate_context_tokens.size();
|
return savestates[slot].savestate_context_tokens.size();
|
||||||
}
|
}
|
||||||
size_t gpttype_calc_new_state_tokencount()
|
size_t gpttype_calc_new_state_tokencount()
|
||||||
{
|
{
|
||||||
return current_context_tokens.size();
|
return current_context_tokens.size();
|
||||||
}
|
}
|
||||||
size_t gpttype_save_state_kv()
|
size_t gpttype_save_state_kv(int slot)
|
||||||
{
|
{
|
||||||
if(kcpp_data==nullptr)
|
if(kcpp_data==nullptr)
|
||||||
{
|
{
|
||||||
|
@ -4347,30 +4346,34 @@ size_t gpttype_save_state_kv()
|
||||||
}
|
}
|
||||||
if(file_format == FileFormat::GGUF_GENERIC)
|
if(file_format == FileFormat::GGUF_GENERIC)
|
||||||
{
|
{
|
||||||
gpttype_clear_state_kv(false); //JIT free
|
if (!savestates[slot].current_savestate_buffer.empty()) { //JIT free
|
||||||
|
savestates[slot].current_savestate_buffer.clear();
|
||||||
|
savestates[slot].savestate_context_tokens.clear();
|
||||||
|
savestates[slot].current_savestate_size = 0;
|
||||||
|
}
|
||||||
size_t newsize = llama_state_get_size(llama_ctx_v4);
|
size_t newsize = llama_state_get_size(llama_ctx_v4);
|
||||||
try {
|
try {
|
||||||
if (current_savestate_buffer.capacity() < newsize + 512) {
|
if (savestates[slot].current_savestate_buffer.capacity() < newsize + 512) {
|
||||||
current_savestate_buffer = std::vector<uint8_t>(newsize + 512);
|
savestates[slot].current_savestate_buffer = std::vector<uint8_t>(newsize + 512);
|
||||||
} else {
|
} else {
|
||||||
current_savestate_buffer.resize(newsize + 512);
|
savestates[slot].current_savestate_buffer.resize(newsize + 512);
|
||||||
}
|
}
|
||||||
current_savestate_buffer.resize(newsize + 512); // add some padding. May throw std::bad_alloc
|
savestates[slot].current_savestate_buffer.resize(newsize + 512); // add some padding. May throw std::bad_alloc
|
||||||
} catch (const std::bad_alloc&) {
|
} catch (const std::bad_alloc&) {
|
||||||
fprintf(stderr, "KV Save State: Failed to allocate %zu bytes.\n", newsize + 512);
|
fprintf(stderr, "KV Save State: Failed to allocate %zu bytes.\n", newsize + 512);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
auto res = llama_state_get_data(llama_ctx_v4, current_savestate_buffer.data(), newsize);
|
auto res = llama_state_get_data(llama_ctx_v4, savestates[slot].current_savestate_buffer.data(), newsize);
|
||||||
if (res > 0) {
|
if (res > 0) {
|
||||||
current_savestate_size = newsize;
|
savestates[slot].current_savestate_size = newsize;
|
||||||
savestate_context_tokens = current_context_tokens;
|
savestates[slot].savestate_context_tokens = current_context_tokens;
|
||||||
printf("\nKV Save State: Created SaveState of %zu tokens, costing %zu MB.\n",current_context_tokens.size(),current_savestate_size/(1024*1024));
|
printf("\nKV Save State %d: Created SaveState of %zu tokens, costing %zu MB.\n",slot,current_context_tokens.size(),savestates[slot].current_savestate_size/(1024*1024));
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
bool gpttype_load_state_kv()
|
bool gpttype_load_state_kv(int slot)
|
||||||
{
|
{
|
||||||
if(kcpp_data==nullptr)
|
if(kcpp_data==nullptr)
|
||||||
{
|
{
|
||||||
|
@ -4378,14 +4381,14 @@ bool gpttype_load_state_kv()
|
||||||
}
|
}
|
||||||
if(file_format == FileFormat::GGUF_GENERIC)
|
if(file_format == FileFormat::GGUF_GENERIC)
|
||||||
{
|
{
|
||||||
if (current_savestate_buffer.empty()) {
|
if (savestates[slot].current_savestate_buffer.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto res = llama_state_set_data(llama_ctx_v4, current_savestate_buffer.data(), current_savestate_size);
|
auto res = llama_state_set_data(llama_ctx_v4, savestates[slot].current_savestate_buffer.data(), savestates[slot].current_savestate_size);
|
||||||
if(res > 0)
|
if(res > 0)
|
||||||
{
|
{
|
||||||
current_context_tokens = savestate_context_tokens;
|
current_context_tokens = savestates[slot].savestate_context_tokens;
|
||||||
printf("\nKV Load SaveState: Restored KV with %zu tokens.\n",current_context_tokens.size());
|
printf("\nKV Load SaveState %d: Restored KV with %zu tokens.\n", slot,current_context_tokens.size());
|
||||||
}
|
}
|
||||||
return (res > 0);
|
return (res > 0);
|
||||||
}
|
}
|
||||||
|
@ -4399,18 +4402,20 @@ bool gpttype_clear_state_kv(bool shrink)
|
||||||
}
|
}
|
||||||
if(file_format == FileFormat::GGUF_GENERIC)
|
if(file_format == FileFormat::GGUF_GENERIC)
|
||||||
{
|
{
|
||||||
if (!current_savestate_buffer.empty()) {
|
for(int slot=0;slot<savestate_limit;++slot)
|
||||||
printf("\nKV Clear SaveState: Freed %zu MB.\n", current_savestate_size / (1024 * 1024));
|
{
|
||||||
current_savestate_buffer.clear();
|
if (!savestates[slot].current_savestate_buffer.empty()) {
|
||||||
|
printf("\nKV Clear SaveState %d: Freed %zu MB.\n",slot, savestates[slot].current_savestate_size / (1024 * 1024));
|
||||||
|
savestates[slot].current_savestate_buffer.clear();
|
||||||
if(shrink)
|
if(shrink)
|
||||||
{
|
{
|
||||||
current_savestate_buffer.shrink_to_fit();
|
savestates[slot].current_savestate_buffer.shrink_to_fit();
|
||||||
|
}
|
||||||
|
savestates[slot].savestate_context_tokens.clear();
|
||||||
|
savestates[slot].current_savestate_size = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
savestate_context_tokens.clear();
|
|
||||||
current_savestate_size = 0;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1909,8 +1909,20 @@
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"example": {
|
"example": {
|
||||||
"success": true,
|
"success": true,
|
||||||
"old_state_size": 0,
|
"old_states": [
|
||||||
"old_tokens": 0,
|
{
|
||||||
|
"tokens": 0,
|
||||||
|
"size": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tokens": 0,
|
||||||
|
"size": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tokens": 0,
|
||||||
|
"size": 0
|
||||||
|
}
|
||||||
|
],
|
||||||
"new_state_size": 0,
|
"new_state_size": 0,
|
||||||
"new_tokens": 0,
|
"new_tokens": 0,
|
||||||
},
|
},
|
||||||
|
@ -1920,13 +1932,21 @@
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"description": "Whether the operation was successful."
|
"description": "Whether the operation was successful."
|
||||||
},
|
},
|
||||||
"old_state_size": {
|
"old_states": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"tokens": {
|
||||||
"type": "number",
|
"type": "number",
|
||||||
"description": "Bytes currently in used for existing save state."
|
"description": "Tokens in this saved state."
|
||||||
},
|
},
|
||||||
"old_tokens": {
|
"size": {
|
||||||
"type": "number",
|
"type": "number",
|
||||||
"description": "How many tokens in currently existing save state."
|
"description": "Size of this saved state in bytes."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"new_state_size": {
|
"new_state_size": {
|
||||||
"type": "number",
|
"type": "number",
|
||||||
|
@ -1952,6 +1972,25 @@
|
||||||
"/api/admin/save_state": {
|
"/api/admin/save_state": {
|
||||||
"post": {
|
"post": {
|
||||||
"description": "Creates a new KV cache save state in memory. Overwrites any existing saved state.",
|
"description": "Creates a new KV cache save state in memory. Overwrites any existing saved state.",
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {
|
||||||
|
"slot": 0,
|
||||||
|
},
|
||||||
|
"schema": {
|
||||||
|
"properties": {
|
||||||
|
"slot": {
|
||||||
|
"type": "number",
|
||||||
|
"description": "Which slot index to save/load the state to/from."
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -1991,6 +2030,25 @@
|
||||||
"/api/admin/load_state": {
|
"/api/admin/load_state": {
|
||||||
"post": {
|
"post": {
|
||||||
"description": "Reloads a previous KV cache save state into context.",
|
"description": "Reloads a previous KV cache save state into context.",
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {
|
||||||
|
"slot": 0,
|
||||||
|
},
|
||||||
|
"schema": {
|
||||||
|
"properties": {
|
||||||
|
"slot": {
|
||||||
|
"type": "number",
|
||||||
|
"description": "Which slot index to save/load the state to/from."
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -2024,7 +2082,7 @@
|
||||||
},
|
},
|
||||||
"/api/admin/clear_state": {
|
"/api/admin/clear_state": {
|
||||||
"post": {
|
"post": {
|
||||||
"description": "Frees any previous KV cache save state.",
|
"description": "Frees all previous KV cache save state.",
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
"content": {
|
"content": {
|
||||||
|
@ -2045,7 +2103,7 @@
|
||||||
"description": "Successful request"
|
"description": "Successful request"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"summary": "Frees any previous KV cache save state.",
|
"summary": "Frees all previous KV cache save state.",
|
||||||
"tags": [
|
"tags": [
|
||||||
"api/admin"
|
"api/admin"
|
||||||
]
|
]
|
||||||
|
|
58
klite.embd
58
klite.embd
|
@ -11213,7 +11213,8 @@ Current version indicated by LITEVER below.
|
||||||
|
|
||||||
function trigger_admin_savestate()
|
function trigger_admin_savestate()
|
||||||
{
|
{
|
||||||
document.getElementById("loadstatetxt").innerText = "Saving State...";
|
let slot = parseInt(document.getElementById("savestate_selection").value);
|
||||||
|
document.getElementById("loadstatetxt").innerText = `Saving State ${slot}...`;
|
||||||
let header = {'Content-Type': 'application/json'};
|
let header = {'Content-Type': 'application/json'};
|
||||||
if(last_admin_key!="")
|
if(last_admin_key!="")
|
||||||
{
|
{
|
||||||
|
@ -11221,27 +11222,31 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
fetch(custom_kobold_endpoint + koboldcpp_admin_savestate_endpoint, {
|
fetch(custom_kobold_endpoint + koboldcpp_admin_savestate_endpoint, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: header
|
headers: header,
|
||||||
|
body: JSON.stringify({
|
||||||
|
"slot": slot
|
||||||
|
})
|
||||||
})
|
})
|
||||||
.then(x => x.json())
|
.then(x => x.json())
|
||||||
.then(values => {
|
.then(values => {
|
||||||
console.log(values);
|
console.log(values);
|
||||||
if(values.success)
|
if(values.success)
|
||||||
{
|
{
|
||||||
document.getElementById("loadstatetxt").innerText = `State Saved (${values.new_tokens} tokens in ${parseInt(values.new_state_size/(1024*1024))} MB)`;
|
document.getElementById("loadstatetxt").innerText = `State ${slot} Saved (${values.new_tokens} tokens in ${parseInt(values.new_state_size/(1024*1024))} MB)`;
|
||||||
}else{
|
}else{
|
||||||
document.getElementById("loadstatetxt").innerText = `Save State Failed!`;
|
document.getElementById("loadstatetxt").innerText = `Save State ${slot} Failed!`;
|
||||||
}
|
}
|
||||||
}).catch((error) => {
|
}).catch((error) => {
|
||||||
console.log("Error: " + error);
|
console.log("Error: " + error);
|
||||||
document.getElementById("loadstatetxt").innerText = `Save State Failed!`;
|
document.getElementById("loadstatetxt").innerText = `Save State ${slot} Failed!`;
|
||||||
msgbox(error,"Error");
|
msgbox(error,"Error");
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function trigger_admin_loadstate()
|
function trigger_admin_loadstate()
|
||||||
{
|
{
|
||||||
document.getElementById("loadstatetxt").innerText = "Loading State...";
|
let slot = parseInt(document.getElementById("savestate_selection").value);
|
||||||
|
document.getElementById("loadstatetxt").innerText = `Loading State ${slot}...`;
|
||||||
let header = {'Content-Type': 'application/json'};
|
let header = {'Content-Type': 'application/json'};
|
||||||
if(last_admin_key!="")
|
if(last_admin_key!="")
|
||||||
{
|
{
|
||||||
|
@ -11249,20 +11254,23 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
fetch(custom_kobold_endpoint + koboldcpp_admin_loadstate_endpoint, {
|
fetch(custom_kobold_endpoint + koboldcpp_admin_loadstate_endpoint, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: header
|
headers: header,
|
||||||
|
body: JSON.stringify({
|
||||||
|
"slot": slot
|
||||||
|
})
|
||||||
})
|
})
|
||||||
.then(x => x.json())
|
.then(x => x.json())
|
||||||
.then(values => {
|
.then(values => {
|
||||||
console.log(values);
|
console.log(values);
|
||||||
if(values.success)
|
if(values.success)
|
||||||
{
|
{
|
||||||
document.getElementById("loadstatetxt").innerText = `State Loaded (${values.new_tokens} tokens)`;
|
document.getElementById("loadstatetxt").innerText = `State ${slot} Loaded (${values.new_tokens} tokens)`;
|
||||||
}else{
|
}else{
|
||||||
document.getElementById("loadstatetxt").innerText = `Load State Failed!`;
|
document.getElementById("loadstatetxt").innerText = `Load State ${slot} Failed!`;
|
||||||
}
|
}
|
||||||
}).catch((error) => {
|
}).catch((error) => {
|
||||||
console.log("Error: " + error);
|
console.log("Error: " + error);
|
||||||
document.getElementById("loadstatetxt").innerText = `Load State Failed!`;
|
document.getElementById("loadstatetxt").innerText = `Load State ${slot} Failed!`;
|
||||||
msgbox(error,"Error");
|
msgbox(error,"Error");
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -17649,6 +17657,14 @@ Current version indicated by LITEVER below.
|
||||||
let pat = new RegExp(localsettings.thinking_pattern, "gmi");
|
let pat = new RegExp(localsettings.thinking_pattern, "gmi");
|
||||||
gentxtspeak = gentxtspeak.replace(pat, '');
|
gentxtspeak = gentxtspeak.replace(pat, '');
|
||||||
}
|
}
|
||||||
|
//remove t2i
|
||||||
|
if (localsettings.img_autogen_type == 2)
|
||||||
|
{
|
||||||
|
const pat = /<t2i>(.*?)<\/t2i>/g;
|
||||||
|
gentxtspeak = gentxtspeak.replace(pat, "");
|
||||||
|
const pat2 = /{{\[IMG_.{1,8}_REF\]}}/g;
|
||||||
|
gentxtspeak = gentxtspeak.replace(pat2, "");
|
||||||
|
}
|
||||||
|
|
||||||
tts_speak(gentxtspeak);
|
tts_speak(gentxtspeak);
|
||||||
}
|
}
|
||||||
|
@ -21185,8 +21201,10 @@ Current version indicated by LITEVER below.
|
||||||
let userinput = getInputBoxValue().trim();
|
let userinput = getInputBoxValue().trim();
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if(userinput!="")
|
if(userinput=="")
|
||||||
{
|
{
|
||||||
|
userinput = "[]";
|
||||||
|
}
|
||||||
let newjson = JSON.parse(userinput);
|
let newjson = JSON.parse(userinput);
|
||||||
pending_wi_obj = pending_wi_obj.filter(item => !currwis.includes(item));
|
pending_wi_obj = pending_wi_obj.filter(item => !currwis.includes(item));
|
||||||
for (var i = 0; i < newjson.length; ++i) {
|
for (var i = 0; i < newjson.length; ++i) {
|
||||||
|
@ -21194,7 +21212,7 @@ Current version indicated by LITEVER below.
|
||||||
pending_wi_obj.push(newjson[i]);
|
pending_wi_obj.push(newjson[i]);
|
||||||
}
|
}
|
||||||
update_wi();
|
update_wi();
|
||||||
}
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("WI JSON not correctly formatted!");
|
console.log("WI JSON not correctly formatted!");
|
||||||
}
|
}
|
||||||
|
@ -21222,6 +21240,13 @@ Current version indicated by LITEVER below.
|
||||||
if(has_tav_wi_check)
|
if(has_tav_wi_check)
|
||||||
{
|
{
|
||||||
wiToAdd = load_tavern_wi(wiToAdd);
|
wiToAdd = load_tavern_wi(wiToAdd);
|
||||||
|
if(wiToAdd && wiToAdd.length > 0)
|
||||||
|
{
|
||||||
|
for(let i=0;i<wiToAdd.length;++i)
|
||||||
|
{
|
||||||
|
wiToAdd[i].wigroup = curr_wi_tab;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (wiToAdd && wiToAdd.length > 0)
|
if (wiToAdd && wiToAdd.length > 0)
|
||||||
{
|
{
|
||||||
|
@ -24789,8 +24814,13 @@ Current version indicated by LITEVER below.
|
||||||
<div>
|
<div>
|
||||||
<b class="color_white" style="padding: 5px;">Save / Load Context State:</b><br>
|
<b class="color_white" style="padding: 5px;">Save / Load Context State:</b><br>
|
||||||
<div style="display:flex;padding: 5px;">
|
<div style="display:flex;padding: 5px;">
|
||||||
<button type="button" style="margin:2px;width:50%" class="btn btn-primary" onclick="trigger_admin_savestate()">Save State</button>
|
<select title="State Slot Selection" style="padding:4px;width:30%" class="form-control" id="savestate_selection">
|
||||||
<button type="button" style="margin:2px;width:50%" class="btn btn-primary" onclick="trigger_admin_loadstate()">Load State</button>
|
<option value="0" selected="selected">State 0</option>
|
||||||
|
<option value="1">State 1</option>
|
||||||
|
<option value="2">State 2</option>
|
||||||
|
</select>
|
||||||
|
<button type="button" style="margin:2px;width:35%" class="btn btn-primary" onclick="trigger_admin_savestate()">Save State</button>
|
||||||
|
<button type="button" style="margin:2px;width:35%" class="btn btn-primary" onclick="trigger_admin_loadstate()">Load State</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="menutext" id="loadstatetxt"></div>
|
<div class="menutext" id="loadstatetxt"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
38
koboldcpp.py
38
koboldcpp.py
|
@ -44,6 +44,7 @@ default_draft_amount = 8
|
||||||
default_ttsmaxlen = 4096
|
default_ttsmaxlen = 4096
|
||||||
default_visionmaxres = 1024
|
default_visionmaxres = 1024
|
||||||
net_save_slots = 10
|
net_save_slots = 10
|
||||||
|
savestate_limit = 3 #3 savestate slots
|
||||||
|
|
||||||
# abuse prevention
|
# abuse prevention
|
||||||
stop_token_max = 256
|
stop_token_max = 256
|
||||||
|
@ -522,10 +523,14 @@ def init_library():
|
||||||
handle.get_pending_output.restype = ctypes.c_char_p
|
handle.get_pending_output.restype = ctypes.c_char_p
|
||||||
handle.get_chat_template.restype = ctypes.c_char_p
|
handle.get_chat_template.restype = ctypes.c_char_p
|
||||||
handle.calc_new_state_kv.restype = ctypes.c_size_t
|
handle.calc_new_state_kv.restype = ctypes.c_size_t
|
||||||
handle.calc_old_state_kv.restype = ctypes.c_size_t
|
|
||||||
handle.calc_new_state_tokencount.restype = ctypes.c_size_t
|
handle.calc_new_state_tokencount.restype = ctypes.c_size_t
|
||||||
|
handle.calc_old_state_kv.argtypes = [ctypes.c_int]
|
||||||
|
handle.calc_old_state_kv.restype = ctypes.c_size_t
|
||||||
|
handle.calc_old_state_tokencount.argtypes = [ctypes.c_int]
|
||||||
handle.calc_old_state_tokencount.restype = ctypes.c_size_t
|
handle.calc_old_state_tokencount.restype = ctypes.c_size_t
|
||||||
|
handle.save_state_kv.argtypes = [ctypes.c_int]
|
||||||
handle.save_state_kv.restype = ctypes.c_size_t
|
handle.save_state_kv.restype = ctypes.c_size_t
|
||||||
|
handle.load_state_kv.argtypes = [ctypes.c_int]
|
||||||
handle.load_state_kv.restype = ctypes.c_bool
|
handle.load_state_kv.restype = ctypes.c_bool
|
||||||
handle.clear_state_kv.restype = ctypes.c_bool
|
handle.clear_state_kv.restype = ctypes.c_bool
|
||||||
handle.sd_load_model.argtypes = [sd_load_model_inputs]
|
handle.sd_load_model.argtypes = [sd_load_model_inputs]
|
||||||
|
@ -3524,23 +3529,42 @@ Change Mode<br>
|
||||||
|
|
||||||
if self.path.endswith('/api/admin/check_state'):
|
if self.path.endswith('/api/admin/check_state'):
|
||||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||||
|
cur_states = []
|
||||||
|
for sl in range(savestate_limit): #0,1,2
|
||||||
|
oldstate = handle.calc_old_state_kv(sl)
|
||||||
|
oldtokencnt = handle.calc_old_state_tokencount(sl)
|
||||||
|
cur_states.append({"tokens":oldtokencnt,"size":oldstate})
|
||||||
newstate = handle.calc_new_state_kv()
|
newstate = handle.calc_new_state_kv()
|
||||||
oldstate = handle.calc_old_state_kv()
|
|
||||||
newtokencnt = handle.calc_new_state_tokencount()
|
newtokencnt = handle.calc_new_state_tokencount()
|
||||||
oldtokencnt = handle.calc_old_state_tokencount()
|
response_body = (json.dumps({"success": True, "old_states":cur_states, "new_state_size":newstate, "new_tokens":newtokencnt}).encode())
|
||||||
response_body = (json.dumps({"success": True, "old_state_size":oldstate, "old_tokens":oldtokencnt, "new_state_size":newstate, "new_tokens":newtokencnt}).encode())
|
|
||||||
else:
|
else:
|
||||||
response_body = (json.dumps({"success": False, "old_state_size":0, "old_tokens":0, "new_state_size":0, "new_tokens":0}).encode())
|
response_body = (json.dumps({"success": False, "old_states":[], "new_state_size":0, "new_tokens":0}).encode())
|
||||||
elif self.path.endswith('/api/admin/load_state'):
|
elif self.path.endswith('/api/admin/load_state'):
|
||||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||||
result = handle.load_state_kv()
|
targetslot = 0
|
||||||
|
try:
|
||||||
|
tempbody = json.loads(body)
|
||||||
|
if isinstance(tempbody, dict):
|
||||||
|
targetslot = tempbody.get('slot', 0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
targetslot = (targetslot if targetslot<savestate_limit else 0)
|
||||||
|
result = handle.load_state_kv(targetslot)
|
||||||
tokencnt = handle.calc_new_state_tokencount()
|
tokencnt = handle.calc_new_state_tokencount()
|
||||||
response_body = (json.dumps({"success": result, "new_tokens":tokencnt}).encode())
|
response_body = (json.dumps({"success": result, "new_tokens":tokencnt}).encode())
|
||||||
else:
|
else:
|
||||||
response_body = (json.dumps({"success": False, "new_tokens":0}).encode())
|
response_body = (json.dumps({"success": False, "new_tokens":0}).encode())
|
||||||
elif self.path.endswith('/api/admin/save_state'):
|
elif self.path.endswith('/api/admin/save_state'):
|
||||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||||
result = handle.save_state_kv()
|
targetslot = 0
|
||||||
|
try:
|
||||||
|
tempbody = json.loads(body)
|
||||||
|
if isinstance(tempbody, dict):
|
||||||
|
targetslot = tempbody.get('slot', 0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
targetslot = (targetslot if targetslot<savestate_limit else 0)
|
||||||
|
result = handle.save_state_kv(targetslot)
|
||||||
tokencnt = handle.calc_new_state_tokencount()
|
tokencnt = handle.calc_new_state_tokencount()
|
||||||
response_body = (json.dumps({"success": (result>0), "new_state_size":result, "new_tokens":tokencnt}).encode())
|
response_body = (json.dumps({"success": (result>0), "new_state_size":result, "new_tokens":tokencnt}).encode())
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -131,8 +131,8 @@ void ContextFastForward(std::vector<int> ¤t_context_tokens, std::vector<in
|
||||||
|
|
||||||
size_t gpttype_calc_new_state_kv();
|
size_t gpttype_calc_new_state_kv();
|
||||||
size_t gpttype_calc_new_state_tokencount();
|
size_t gpttype_calc_new_state_tokencount();
|
||||||
size_t gpttype_calc_old_state_kv();
|
size_t gpttype_calc_old_state_kv(int slot);
|
||||||
size_t gpttype_calc_old_state_tokencount();
|
size_t gpttype_calc_old_state_tokencount(int slot);
|
||||||
size_t gpttype_save_state_kv();
|
size_t gpttype_save_state_kv(int slot);
|
||||||
bool gpttype_load_state_kv();
|
bool gpttype_load_state_kv(int slot);
|
||||||
bool gpttype_clear_state_kv(bool shrink);
|
bool gpttype_clear_state_kv(bool shrink);
|
|
@ -517,4 +517,11 @@ struct speculative_draft_result
|
||||||
int drafted_amount = 0;
|
int drafted_amount = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct savestate_data
|
||||||
|
{
|
||||||
|
size_t current_savestate_size = 0;
|
||||||
|
std::vector<uint8_t> current_savestate_buffer;
|
||||||
|
std::vector<gpt_vocab::id> savestate_context_tokens; //for context clones
|
||||||
|
};
|
||||||
|
|
||||||
const float default_norm_eps = 1e-5f;
|
const float default_norm_eps = 1e-5f;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue