use a static buffer for kv reloads instead. also, added into lite ui

This commit is contained in:
Concedo 2025-06-03 22:32:46 +08:00
parent 4b57108508
commit 53f1511396
6 changed files with 239 additions and 99 deletions

View file

@ -376,11 +376,19 @@ extern "C"
{ {
return gpttype_calc_new_state_kv(); return gpttype_calc_new_state_kv();
} }
size_t calc_new_state_tokencount()
{
return gpttype_calc_new_state_tokencount();
}
size_t calc_old_state_kv() //returns how much memory current savestate is using size_t calc_old_state_kv() //returns how much memory current savestate is using
{ {
return gpttype_calc_old_state_kv(); return gpttype_calc_old_state_kv();
} }
bool save_state_kv() //triggers the save kv state of current ctx to memory size_t calc_old_state_tokencount()
{
return gpttype_calc_old_state_tokencount();
}
size_t save_state_kv() //triggers the save kv state of current ctx to memory
{ {
return gpttype_save_state_kv(); return gpttype_save_state_kv();
} }
@ -390,6 +398,6 @@ extern "C"
} }
bool clear_state_kv() bool clear_state_kv()
{ {
return gpttype_clear_state_kv(); return gpttype_clear_state_kv(true);
} }
} }

View file

@ -143,7 +143,7 @@ std::deque<std::string> delayed_generated_tokens; //for use with antislop sampli
static std::map<int,std::vector<int>> antislop_banned_token_ids; //first is the npast position, second is the array of banned ids at that index static std::map<int,std::vector<int>> antislop_banned_token_ids; //first is the npast position, second is the array of banned ids at that index
static size_t current_savestate_size = 0; static size_t current_savestate_size = 0;
uint8_t * current_savestate_ptr = nullptr; static std::vector<uint8_t> current_savestate_buffer;
static std::vector<gpt_vocab::id> savestate_context_tokens; //for context clones static std::vector<gpt_vocab::id> savestate_context_tokens; //for context clones
inline int kcpp_cpu_has_blas(void) { inline int kcpp_cpu_has_blas(void) {
@ -4331,30 +4331,44 @@ size_t gpttype_calc_old_state_kv()
{ {
return current_savestate_size; return current_savestate_size;
} }
bool gpttype_save_state_kv() size_t gpttype_calc_old_state_tokencount()
{
return savestate_context_tokens.size();
}
size_t gpttype_calc_new_state_tokencount()
{
return current_context_tokens.size();
}
size_t gpttype_save_state_kv()
{ {
if(kcpp_data==nullptr) if(kcpp_data==nullptr)
{ {
return false; return 0;
} }
if(file_format == FileFormat::GGUF_GENERIC) if(file_format == FileFormat::GGUF_GENERIC)
{ {
gpttype_clear_state_kv(); //JIT free gpttype_clear_state_kv(false); //JIT free
size_t newsize = llama_state_get_size(llama_ctx_v4); size_t newsize = llama_state_get_size(llama_ctx_v4);
current_savestate_ptr = (uint8_t *) malloc(newsize + 512); //add some padding try {
if(!current_savestate_ptr) if (current_savestate_buffer.capacity() < newsize + 512) {
{ current_savestate_buffer = std::vector<uint8_t>(newsize + 512);
return false; } else {
current_savestate_buffer.resize(newsize + 512);
} }
auto res = llama_state_get_data(llama_ctx_v4, current_savestate_ptr, newsize); current_savestate_buffer.resize(newsize + 512); // add some padding. May throw std::bad_alloc
} catch (const std::bad_alloc&) {
fprintf(stderr, "KV Save State: Failed to allocate %zu bytes.\n", newsize + 512);
return 0;
}
auto res = llama_state_get_data(llama_ctx_v4, current_savestate_buffer.data(), newsize);
if (res > 0) { if (res > 0) {
current_savestate_size = newsize; current_savestate_size = newsize;
savestate_context_tokens = current_context_tokens; savestate_context_tokens = current_context_tokens;
printf("\nKV Save State: Created SaveState of %zu tokens, costing %zu MB.\n",current_context_tokens.size(),current_savestate_size/(1024*1024)); printf("\nKV Save State: Created SaveState of %zu tokens, costing %zu MB.\n",current_context_tokens.size(),current_savestate_size/(1024*1024));
} }
return (res > 0); return res;
} }
return false; return 0;
} }
bool gpttype_load_state_kv() bool gpttype_load_state_kv()
{ {
@ -4364,10 +4378,10 @@ bool gpttype_load_state_kv()
} }
if(file_format == FileFormat::GGUF_GENERIC) if(file_format == FileFormat::GGUF_GENERIC)
{ {
if (current_savestate_ptr == nullptr || current_savestate_size == 0) { if (current_savestate_buffer.empty()) {
return false; return false;
} }
auto res = llama_state_set_data(llama_ctx_v4, current_savestate_ptr, current_savestate_size); auto res = llama_state_set_data(llama_ctx_v4, current_savestate_buffer.data(), current_savestate_size);
if(res > 0) if(res > 0)
{ {
current_context_tokens = savestate_context_tokens; current_context_tokens = savestate_context_tokens;
@ -4377,7 +4391,7 @@ bool gpttype_load_state_kv()
} }
return false; return false;
} }
bool gpttype_clear_state_kv() bool gpttype_clear_state_kv(bool shrink)
{ {
if(kcpp_data==nullptr) if(kcpp_data==nullptr)
{ {
@ -4385,11 +4399,13 @@ bool gpttype_clear_state_kv()
} }
if(file_format == FileFormat::GGUF_GENERIC) if(file_format == FileFormat::GGUF_GENERIC)
{ {
if (current_savestate_ptr != nullptr) { if (!current_savestate_buffer.empty()) {
//JIT free printf("\nKV Clear SaveState: Freed %zu MB.\n", current_savestate_size / (1024 * 1024));
printf("\nKV Clear SaveState: Freed %zu MB.\n",current_savestate_size/(1024*1024)); current_savestate_buffer.clear();
free(current_savestate_ptr); if(shrink)
current_savestate_ptr = nullptr; {
current_savestate_buffer.shrink_to_fit();
}
savestate_context_tokens.clear(); savestate_context_tokens.clear();
current_savestate_size = 0; current_savestate_size = 0;
return true; return true;

View file

@ -440,7 +440,7 @@
"info": { "info": {
"title": "KoboldCpp API", "title": "KoboldCpp API",
"description": "For swagger.json, <a href=\"?json=1\">click here</a> or use <a href=\"https://lite.koboldai.net/koboldcpp_api.json\">online version</a>.", "description": "For swagger.json, <a href=\"?json=1\">click here</a> or use <a href=\"https://lite.koboldai.net/koboldcpp_api.json\">online version</a>.",
"version": "2025.01.08" "version": "2025.06.03"
}, },
"openapi": "3.0.3", "openapi": "3.0.3",
"paths": { "paths": {
@ -639,7 +639,7 @@
"application/json": { "application/json": {
"example": { "example": {
"result": "KoboldCpp", "result": "KoboldCpp",
"version": "2025.01.08", "version": "2025.06.03",
"protected": false, "protected": false,
"txt2img": false, "txt2img": false,
"vision": false, "vision": false,
@ -1909,8 +1909,10 @@
"application/json": { "application/json": {
"example": { "example": {
"success": true, "success": true,
"old_state": 0, "old_state_size": 0,
"new_state": 0 "old_tokens": 0,
"new_state_size": 0,
"new_tokens": 0,
}, },
"schema": { "schema": {
"properties": { "properties": {
@ -1918,13 +1920,21 @@
"type": "boolean", "type": "boolean",
"description": "Whether the operation was successful." "description": "Whether the operation was successful."
}, },
"old_state": { "old_state_size": {
"type": "number", "type": "number",
"description": "Bytes currently in used for existing save state." "description": "Bytes currently in used for existing save state."
}, },
"new_state": { "old_tokens": {
"type": "number",
"description": "How many tokens in currently existing save state."
},
"new_state_size": {
"type": "number", "type": "number",
"description": "Bytes a new save state is estimated to consume." "description": "Bytes a new save state is estimated to consume."
},
"new_tokens": {
"type": "number",
"description": "How many tokens will be stored if a new save state is made."
} }
} }
} }
@ -1947,13 +1957,23 @@
"content": { "content": {
"application/json": { "application/json": {
"example": { "example": {
"success": true "success": true,
"new_state_size": 12345678,
"new_tokens": 100,
}, },
"schema": { "schema": {
"properties": { "properties": {
"success": { "success": {
"type": "boolean", "type": "boolean",
"description": "Whether the operation was successful." "description": "Whether the operation was successful."
},
"new_state_size": {
"type": "number",
"description": "Bytes a new save state is estimated to consume."
},
"new_tokens": {
"type": "number",
"description": "How many context tokens were saved in state."
} }
} }
} }
@ -1976,13 +1996,18 @@
"content": { "content": {
"application/json": { "application/json": {
"example": { "example": {
"success": true "success": true,
"new_tokens": 100
}, },
"schema": { "schema": {
"properties": { "properties": {
"success": { "success": {
"type": "boolean", "type": "boolean",
"description": "Whether the operation was successful." "description": "Whether the operation was successful."
},
"new_tokens": {
"type": "number",
"description": "How many context tokens were loaded from state."
} }
} }
} }
@ -2423,7 +2448,7 @@
"/v1/completions": { "/v1/completions": {
"post": { "post": {
"summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation", "summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation",
"description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions)", "description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions). All KoboldCpp samplers are supported, please refer to /api/v1/generate for more details.",
"requestBody": { "requestBody": {
"content": { "content": {
"application/json": { "application/json": {
@ -2445,7 +2470,7 @@
"/v1/chat/completions": { "/v1/chat/completions": {
"post": { "post": {
"summary": "Generates a response from a list of messages. Please refer to OpenAI documentation", "summary": "Generates a response from a list of messages. Please refer to OpenAI documentation",
"description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat)", "description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat). All KoboldCpp samplers are supported, please refer to /api/v1/generate for more details.",
"requestBody": { "requestBody": {
"content": { "content": {
"application/json": { "application/json": {

View file

@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
--> -->
<script> <script>
const LITEVER = 248; const LITEVER = 250;
const urlParams = new URLSearchParams(window.location.search); const urlParams = new URLSearchParams(window.location.search);
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_"; const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -2163,6 +2163,9 @@ Current version indicated by LITEVER below.
.color_offwhite { .color_offwhite {
color: #bedae9; color: #bedae9;
} }
.color_white {
color: #ffffff;
}
.color_darkgreen { .color_darkgreen {
color: #63975c; color: #63975c;
} }
@ -2999,6 +3002,8 @@ Current version indicated by LITEVER below.
const koboldcpp_tts_endpoint = "/api/extra/tts"; const koboldcpp_tts_endpoint = "/api/extra/tts";
const koboldcpp_admin_list_endpoint = "/api/admin/list_options"; const koboldcpp_admin_list_endpoint = "/api/admin/list_options";
const koboldcpp_admin_reload_endpoint = "/api/admin/reload_config"; const koboldcpp_admin_reload_endpoint = "/api/admin/reload_config";
const koboldcpp_admin_savestate_endpoint = "/api/admin/save_state";
const koboldcpp_admin_loadstate_endpoint = "/api/admin/load_state";
const koboldcpp_savedata_list_endpoint = "/api/extra/data/list"; const koboldcpp_savedata_list_endpoint = "/api/extra/data/list";
const koboldcpp_savedata_save_endpoint = "/api/extra/data/save"; const koboldcpp_savedata_save_endpoint = "/api/extra/data/save";
const koboldcpp_savedata_load_endpoint = "/api/extra/data/load"; const koboldcpp_savedata_load_endpoint = "/api/extra/data/load";
@ -7362,7 +7367,7 @@ Current version indicated by LITEVER below.
} }
} else { } else {
//check for tavernai fields //check for tavernai fields
let has_tav_wi_check = (new_loaded_storyobj && new_loaded_storyobj.entries && new_loaded_storyobj.entries["0"] && new_loaded_storyobj.entries["0"].hasOwnProperty("uid")); let has_tav_wi_check = has_tavern_wi_check(new_loaded_storyobj);
if (!new_loaded_storyobj.scenarioVersion && (new_loaded_storyobj.name != null || new_loaded_storyobj.description != null || if (!new_loaded_storyobj.scenarioVersion && (new_loaded_storyobj.name != null || new_loaded_storyobj.description != null ||
new_loaded_storyobj.personality != null || new_loaded_storyobj.spec=="chara_card_v2" || has_tav_wi_check)) { new_loaded_storyobj.personality != null || new_loaded_storyobj.spec=="chara_card_v2" || has_tav_wi_check)) {
load_tavern_obj(new_loaded_storyobj); load_tavern_obj(new_loaded_storyobj);
@ -7837,6 +7842,18 @@ Current version indicated by LITEVER below.
} }
}); });
} }
function has_tavern_wi_check(obj)
{
let checkresult = false;
if (obj && obj.entries) {
const keys = Object.keys(obj.entries);
if (keys.length > 0) {
const firstEntry = obj.entries[keys[0]];
checkresult = firstEntry && firstEntry.hasOwnProperty("uid");
}
}
return checkresult;
}
function load_agnai_wi(obj,chatopponent,myname) function load_agnai_wi(obj,chatopponent,myname)
{ {
@ -7930,7 +7947,7 @@ Current version indicated by LITEVER below.
function importLorebookAsTextDB(lorebook) function importLorebookAsTextDB(lorebook)
{ {
let has_tav_wi_check = (lorebook && lorebook.entries && lorebook.entries["0"] && lorebook.entries["0"].hasOwnProperty("uid")); let has_tav_wi_check = has_tavern_wi_check(lorebook);
if (lorebook && has_tav_wi_check) if (lorebook && has_tav_wi_check)
{ {
let lbname = lorebook.name?lorebook.name:"UntitledLorebook"; let lbname = lorebook.name?lorebook.name:"UntitledLorebook";
@ -8051,7 +8068,7 @@ Current version indicated by LITEVER below.
} }
let combinedmem = sysprompt + memory + scenario + examplemsg; let combinedmem = sysprompt + memory + scenario + examplemsg;
let agnaidatafieldsempty = scenario + examplemsg + (obj.personality?obj.personality:"") + greeting; let agnaidatafieldsempty = scenario + examplemsg + (obj.personality?obj.personality:"") + greeting;
let has_tav_wi_check = (obj && obj.entries && obj.entries["0"] && obj.entries["0"].hasOwnProperty("uid")); let has_tav_wi_check = has_tavern_wi_check(obj);
//check if it's a world info only card, if so, do not restart game //check if it's a world info only card, if so, do not restart game
if(combinedmem.trim()=="" && greeting=="" && has_tav_wi_check) if(combinedmem.trim()=="" && greeting=="" && has_tav_wi_check)
{ {
@ -8102,6 +8119,10 @@ Current version indicated by LITEVER below.
{ {
current_wi = load_agnai_wi(obj,chatopponent,myname); current_wi = load_agnai_wi(obj,chatopponent,myname);
} }
else if (has_tav_wi_check)
{
current_wi = load_tavern_wi(obj,chatopponent,myname);
}
} }
update_for_sidepanel(); update_for_sidepanel();
render_gametext(true); render_gametext(true);
@ -11074,6 +11095,7 @@ Current version indicated by LITEVER below.
function display_admin_container() function display_admin_container()
{ {
mainmenu_untab(false); mainmenu_untab(false);
document.getElementById("loadstatetxt").innerText = "";
let fetch_kcpps_configs = function(adminkey) let fetch_kcpps_configs = function(adminkey)
{ {
let header = {'Content-Type': 'application/json'}; let header = {'Content-Type': 'application/json'};
@ -11189,6 +11211,62 @@ Current version indicated by LITEVER below.
}); });
} }
function trigger_admin_savestate()
{
document.getElementById("loadstatetxt").innerText = "Saving State...";
let header = {'Content-Type': 'application/json'};
if(last_admin_key!="")
{
header['Authorization'] = 'Bearer ' + last_admin_key;
}
fetch(custom_kobold_endpoint + koboldcpp_admin_savestate_endpoint, {
method: 'POST',
headers: header
})
.then(x => x.json())
.then(values => {
console.log(values);
if(values.success)
{
document.getElementById("loadstatetxt").innerText = `State Saved (${values.new_tokens} tokens in ${parseInt(values.new_state_size/(1024*1024))} MB)`;
}else{
document.getElementById("loadstatetxt").innerText = `Save State Failed!`;
}
}).catch((error) => {
console.log("Error: " + error);
document.getElementById("loadstatetxt").innerText = `Save State Failed!`;
msgbox(error,"Error");
});
}
function trigger_admin_loadstate()
{
document.getElementById("loadstatetxt").innerText = "Loading State...";
let header = {'Content-Type': 'application/json'};
if(last_admin_key!="")
{
header['Authorization'] = 'Bearer ' + last_admin_key;
}
fetch(custom_kobold_endpoint + koboldcpp_admin_loadstate_endpoint, {
method: 'POST',
headers: header
})
.then(x => x.json())
.then(values => {
console.log(values);
if(values.success)
{
document.getElementById("loadstatetxt").innerText = `State Loaded (${values.new_tokens} tokens)`;
}else{
document.getElementById("loadstatetxt").innerText = `Load State Failed!`;
}
}).catch((error) => {
console.log("Error: " + error);
document.getElementById("loadstatetxt").innerText = `Load State Failed!`;
msgbox(error,"Error");
});
}
var cachedsaveslotlabels = []; var cachedsaveslotlabels = [];
var netsaveslotlabels = []; var netsaveslotlabels = [];
function saveloadchangeslot(updatelist=false) function saveloadchangeslot(updatelist=false)
@ -22754,7 +22832,7 @@ Current version indicated by LITEVER below.
<div style="float:right;"> <div style="float:right;">
<div class="settinglabel"> <div class="settinglabel">
<button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="wi_group_export()">[Export / Import Group]</button> <button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="wi_group_export()">[Edit Group]</button>
<button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="export_wi_to_file()">[Export all WI to file]</button> <button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="export_wi_to_file()">[Export all WI to file]</button>
<button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="import_wi_from_file()">[Import all WI from file]</button> <button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="import_wi_from_file()">[Import all WI from file]</button>
</div> </div>
@ -24698,21 +24776,31 @@ Current version indicated by LITEVER below.
<div class="popupcontainer flex hidden" id="admincontainer"> <div class="popupcontainer flex hidden" id="admincontainer">
<div class="popupbg flex"></div> <div class="popupbg flex"></div>
<div class="nspopup flexsizevsmall"> <div class="nspopup flexsizesmall">
<div class="popuptitlebar"> <div class="popuptitlebar">
<div class="popuptitletext">Change Loaded KoboldCpp Config</div> <div class="popuptitletext">KoboldCpp Admin Config</div>
</div> </div>
<div class="menutext"> <br>
<b></b>Warning: This will terminate the current KoboldCpp instance and relaunch it with a new config.</b><br><br>
If an invalid configuration is selected, the new server may fail to relaunch!<br><br>
<div> <div>
<select title="Select New Config" style="padding:4px;" class="form-control" id="adminconfigdropdown"> <b class="color_white" style="padding: 5px;">Save / Load Context State:</b><br>
</select> <div style="display:flex;padding: 5px;">
<button type="button" style="margin:2px;width:50%" class="btn btn-primary" onclick="trigger_admin_savestate()">Save State</button>
<button type="button" style="margin:2px;width:50%" class="btn btn-primary" onclick="trigger_admin_loadstate()">Load State</button>
</div> </div>
<div class="menutext" id="loadstatetxt"></div>
</div>
<br>
<div>
<b class="color_white" style="padding: 5px;">Change Loaded Model / Config:</b><br>
<div style="display:flex;padding: 5px;">
<select title="Select New Config" style="padding:4px; width:calc(100% - 150px)" class="form-control" id="adminconfigdropdown">
</select>
<button type="button" style="margin-left:2px;width:146px" class="btn btn-primary" onclick="trigger_admin_reload()">Reload KoboldCpp</button>
</div>
<div class="menutext">Warning: This will terminate the current KoboldCpp instance and relaunch it with a new config. If an invalid configuration is selected, the new server may fail to relaunch!</div>
<br> <br>
</div> </div>
<div class="popupfooter"> <div class="popupfooter">
<button type="button" style="width:200px" class="btn btn-primary" onclick="trigger_admin_reload()">Reload KoboldCpp</button>
<button type="button" class="btn btn-primary" onclick="hide_popups()">Cancel</button> <button type="button" class="btn btn-primary" onclick="hide_popups()">Cancel</button>
</div> </div>
</div> </div>

View file

@ -523,7 +523,9 @@ def init_library():
handle.get_chat_template.restype = ctypes.c_char_p handle.get_chat_template.restype = ctypes.c_char_p
handle.calc_new_state_kv.restype = ctypes.c_size_t handle.calc_new_state_kv.restype = ctypes.c_size_t
handle.calc_old_state_kv.restype = ctypes.c_size_t handle.calc_old_state_kv.restype = ctypes.c_size_t
handle.save_state_kv.restype = ctypes.c_bool handle.calc_new_state_tokencount.restype = ctypes.c_size_t
handle.calc_old_state_tokencount.restype = ctypes.c_size_t
handle.save_state_kv.restype = ctypes.c_size_t
handle.load_state_kv.restype = ctypes.c_bool handle.load_state_kv.restype = ctypes.c_bool
handle.clear_state_kv.restype = ctypes.c_bool handle.clear_state_kv.restype = ctypes.c_bool
handle.sd_load_model.argtypes = [sd_load_model_inputs] handle.sd_load_model.argtypes = [sd_load_model_inputs]
@ -3090,7 +3092,7 @@ Change Mode<br>
elif self.path=="/v1": elif self.path=="/v1":
content_type = 'text/html' content_type = 'text/html'
response_body = ("KoboldCpp OpenAI compatible endpoint is running!\n\nFor usage reference, see https://platform.openai.com/docs/api-reference").encode() response_body = ("KoboldCpp OpenAI compatible endpoint is running!<br>For usage reference, see <a href='https://platform.openai.com/docs/api-reference'>https://platform.openai.com/docs/api-reference</a><br>For other endpoints, see <a href='/api'>KoboldCpp API Documentation</a>").encode()
elif self.path=="/api/extra/preloadstory": elif self.path=="/api/extra/preloadstory":
if preloaded_story is None: if preloaded_story is None:
@ -3457,32 +3459,6 @@ Change Mode<br>
resp = {"success": True} resp = {"success": True}
response_body = (json.dumps(resp).encode()) response_body = (json.dumps(resp).encode())
elif self.path.endswith('/api/admin/check_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
newstate = handle.calc_new_state_kv()
oldstate = handle.calc_old_state_kv()
response_body = (json.dumps({"success": True, "old_state":oldstate, "new_state":newstate}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/api/admin/load_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.load_state_kv()
response_body = (json.dumps({"success": result}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/api/admin/save_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.save_state_kv()
response_body = (json.dumps({"success": result}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/api/admin/clear_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.clear_state_kv()
response_body = (json.dumps({"success": result}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/set_tts_settings'): #return dummy response elif self.path.endswith('/set_tts_settings'): #return dummy response
response_body = (json.dumps({"message": "Settings successfully applied"}).encode()) response_body = (json.dumps({"message": "Settings successfully applied"}).encode())
@ -3532,33 +3508,58 @@ Change Mode<br>
if reqblocking: if reqblocking:
requestsinqueue = (requestsinqueue - 1) if requestsinqueue > 0 else 0 requestsinqueue = (requestsinqueue - 1) if requestsinqueue > 0 else 0
# handle endpoints that require mutex locking and handle actual gens
try: try:
sse_stream_flag = False sse_stream_flag = False
api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama,7=ollamachat api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama,7=ollamachat
is_imggen = False is_imggen = False
is_comfyui_imggen = False is_comfyui_imggen = False
is_transcribe = False is_transcribe = False
is_tts = False is_tts = False
is_embeddings = False is_embeddings = False
response_body = None
if self.path.endswith('/request'): if self.path.endswith('/api/admin/check_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
newstate = handle.calc_new_state_kv()
oldstate = handle.calc_old_state_kv()
newtokencnt = handle.calc_new_state_tokencount()
oldtokencnt = handle.calc_old_state_tokencount()
response_body = (json.dumps({"success": True, "old_state_size":oldstate, "old_tokens":oldtokencnt, "new_state_size":newstate, "new_tokens":newtokencnt}).encode())
else:
response_body = (json.dumps({"success": False, "old_state_size":0, "old_tokens":0, "new_state_size":0, "new_tokens":0}).encode())
elif self.path.endswith('/api/admin/load_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.load_state_kv()
tokencnt = handle.calc_new_state_tokencount()
response_body = (json.dumps({"success": result, "new_tokens":tokencnt}).encode())
else:
response_body = (json.dumps({"success": False, "new_tokens":0}).encode())
elif self.path.endswith('/api/admin/save_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.save_state_kv()
tokencnt = handle.calc_new_state_tokencount()
response_body = (json.dumps({"success": (result>0), "new_state_size":result, "new_tokens":tokencnt}).encode())
else:
response_body = (json.dumps({"success": False, "new_state_size":0, "new_tokens":0}).encode())
elif self.path.endswith('/api/admin/clear_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.clear_state_kv()
response_body = (json.dumps({"success": result}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/request'):
api_format = 1 api_format = 1
elif self.path.endswith(('/api/v1/generate', '/api/latest/generate')):
if self.path.endswith(('/api/v1/generate', '/api/latest/generate')):
api_format = 2 api_format = 2
elif self.path.endswith('/api/extra/generate/stream'):
if self.path.endswith('/api/extra/generate/stream'):
api_format = 2 api_format = 2
sse_stream_flag = True sse_stream_flag = True
elif self.path.endswith('/v1/completions') or self.path.endswith('/v1/completion'):
if self.path.endswith('/v1/completions') or self.path.endswith('/v1/completion'):
api_format = 3 api_format = 3
elif self.path.endswith('/v1/chat/completions'):
if self.path.endswith('/v1/chat/completions'):
api_format = 4 api_format = 4
elif self.path.endswith('/sdapi/v1/interrogate'):
if self.path.endswith('/sdapi/v1/interrogate'):
has_vision = (mmprojpath!="") has_vision = (mmprojpath!="")
if not has_vision: if not has_vision:
self.send_response(503) self.send_response(503)
@ -3569,27 +3570,27 @@ Change Mode<br>
}}).encode()) }}).encode())
return return
api_format = 5 api_format = 5
elif self.path.endswith('/api/generate'):
if self.path.endswith('/api/generate'):
api_format = 6 api_format = 6
if self.path.endswith('/api/chat'): elif self.path.endswith('/api/chat'):
api_format = 7 api_format = 7
elif self.path=="/prompt" or self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):
if self.path=="/prompt" or self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):
is_imggen = True is_imggen = True
if self.path=="/prompt": if self.path=="/prompt":
is_comfyui_imggen = True is_comfyui_imggen = True
elif self.path.endswith('/api/extra/transcribe') or self.path.endswith('/v1/audio/transcriptions'):
if self.path.endswith('/api/extra/transcribe') or self.path.endswith('/v1/audio/transcriptions'):
is_transcribe = True is_transcribe = True
elif self.path.endswith('/api/extra/tts') or self.path.endswith('/v1/audio/speech') or self.path.endswith('/tts_to_audio'):
if self.path.endswith('/api/extra/tts') or self.path.endswith('/v1/audio/speech') or self.path.endswith('/tts_to_audio'):
is_tts = True is_tts = True
elif self.path.endswith('/api/extra/embeddings') or self.path.endswith('/v1/embeddings'):
if self.path.endswith('/api/extra/embeddings') or self.path.endswith('/v1/embeddings'):
is_embeddings = True is_embeddings = True
if is_imggen or is_transcribe or is_tts or is_embeddings or api_format > 0: if response_body is not None:
self.send_response(response_code)
self.send_header('content-length', str(len(response_body)))
self.end_headers(content_type='application/json')
self.wfile.write(response_body)
elif is_imggen or is_transcribe or is_tts or is_embeddings or api_format > 0:
global last_req_time global last_req_time
last_req_time = time.time() last_req_time = time.time()

View file

@ -130,7 +130,9 @@ void ContextFastForward(std::vector<int> &current_context_tokens, std::vector<in
const bool useSmartContext, const bool requireFullSubset); const bool useSmartContext, const bool requireFullSubset);
size_t gpttype_calc_new_state_kv(); size_t gpttype_calc_new_state_kv();
size_t gpttype_calc_new_state_tokencount();
size_t gpttype_calc_old_state_kv(); size_t gpttype_calc_old_state_kv();
bool gpttype_save_state_kv(); size_t gpttype_calc_old_state_tokencount();
size_t gpttype_save_state_kv();
bool gpttype_load_state_kv(); bool gpttype_load_state_kv();
bool gpttype_clear_state_kv(); bool gpttype_clear_state_kv(bool shrink);