mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
use a static buffer for kv reloads instead. also, added into lite ui
This commit is contained in:
parent
4b57108508
commit
53f1511396
6 changed files with 239 additions and 99 deletions
12
expose.cpp
12
expose.cpp
|
@ -376,11 +376,19 @@ extern "C"
|
|||
{
|
||||
return gpttype_calc_new_state_kv();
|
||||
}
|
||||
size_t calc_new_state_tokencount()
|
||||
{
|
||||
return gpttype_calc_new_state_tokencount();
|
||||
}
|
||||
size_t calc_old_state_kv() //returns how much memory current savestate is using
|
||||
{
|
||||
return gpttype_calc_old_state_kv();
|
||||
}
|
||||
bool save_state_kv() //triggers the save kv state of current ctx to memory
|
||||
size_t calc_old_state_tokencount()
|
||||
{
|
||||
return gpttype_calc_old_state_tokencount();
|
||||
}
|
||||
size_t save_state_kv() //triggers the save kv state of current ctx to memory
|
||||
{
|
||||
return gpttype_save_state_kv();
|
||||
}
|
||||
|
@ -390,6 +398,6 @@ extern "C"
|
|||
}
|
||||
bool clear_state_kv()
|
||||
{
|
||||
return gpttype_clear_state_kv();
|
||||
return gpttype_clear_state_kv(true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -143,7 +143,7 @@ std::deque<std::string> delayed_generated_tokens; //for use with antislop sampli
|
|||
static std::map<int,std::vector<int>> antislop_banned_token_ids; //first is the npast position, second is the array of banned ids at that index
|
||||
|
||||
static size_t current_savestate_size = 0;
|
||||
uint8_t * current_savestate_ptr = nullptr;
|
||||
static std::vector<uint8_t> current_savestate_buffer;
|
||||
static std::vector<gpt_vocab::id> savestate_context_tokens; //for context clones
|
||||
|
||||
inline int kcpp_cpu_has_blas(void) {
|
||||
|
@ -4331,30 +4331,44 @@ size_t gpttype_calc_old_state_kv()
|
|||
{
|
||||
return current_savestate_size;
|
||||
}
|
||||
bool gpttype_save_state_kv()
|
||||
size_t gpttype_calc_old_state_tokencount()
|
||||
{
|
||||
return savestate_context_tokens.size();
|
||||
}
|
||||
size_t gpttype_calc_new_state_tokencount()
|
||||
{
|
||||
return current_context_tokens.size();
|
||||
}
|
||||
size_t gpttype_save_state_kv()
|
||||
{
|
||||
if(kcpp_data==nullptr)
|
||||
{
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
if(file_format == FileFormat::GGUF_GENERIC)
|
||||
{
|
||||
gpttype_clear_state_kv(); //JIT free
|
||||
gpttype_clear_state_kv(false); //JIT free
|
||||
size_t newsize = llama_state_get_size(llama_ctx_v4);
|
||||
current_savestate_ptr = (uint8_t *) malloc(newsize + 512); //add some padding
|
||||
if(!current_savestate_ptr)
|
||||
{
|
||||
return false;
|
||||
try {
|
||||
if (current_savestate_buffer.capacity() < newsize + 512) {
|
||||
current_savestate_buffer = std::vector<uint8_t>(newsize + 512);
|
||||
} else {
|
||||
current_savestate_buffer.resize(newsize + 512);
|
||||
}
|
||||
auto res = llama_state_get_data(llama_ctx_v4, current_savestate_ptr, newsize);
|
||||
current_savestate_buffer.resize(newsize + 512); // add some padding. May throw std::bad_alloc
|
||||
} catch (const std::bad_alloc&) {
|
||||
fprintf(stderr, "KV Save State: Failed to allocate %zu bytes.\n", newsize + 512);
|
||||
return 0;
|
||||
}
|
||||
auto res = llama_state_get_data(llama_ctx_v4, current_savestate_buffer.data(), newsize);
|
||||
if (res > 0) {
|
||||
current_savestate_size = newsize;
|
||||
savestate_context_tokens = current_context_tokens;
|
||||
printf("\nKV Save State: Created SaveState of %zu tokens, costing %zu MB.\n",current_context_tokens.size(),current_savestate_size/(1024*1024));
|
||||
}
|
||||
return (res > 0);
|
||||
return res;
|
||||
}
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
bool gpttype_load_state_kv()
|
||||
{
|
||||
|
@ -4364,10 +4378,10 @@ bool gpttype_load_state_kv()
|
|||
}
|
||||
if(file_format == FileFormat::GGUF_GENERIC)
|
||||
{
|
||||
if (current_savestate_ptr == nullptr || current_savestate_size == 0) {
|
||||
if (current_savestate_buffer.empty()) {
|
||||
return false;
|
||||
}
|
||||
auto res = llama_state_set_data(llama_ctx_v4, current_savestate_ptr, current_savestate_size);
|
||||
auto res = llama_state_set_data(llama_ctx_v4, current_savestate_buffer.data(), current_savestate_size);
|
||||
if(res > 0)
|
||||
{
|
||||
current_context_tokens = savestate_context_tokens;
|
||||
|
@ -4377,7 +4391,7 @@ bool gpttype_load_state_kv()
|
|||
}
|
||||
return false;
|
||||
}
|
||||
bool gpttype_clear_state_kv()
|
||||
bool gpttype_clear_state_kv(bool shrink)
|
||||
{
|
||||
if(kcpp_data==nullptr)
|
||||
{
|
||||
|
@ -4385,11 +4399,13 @@ bool gpttype_clear_state_kv()
|
|||
}
|
||||
if(file_format == FileFormat::GGUF_GENERIC)
|
||||
{
|
||||
if (current_savestate_ptr != nullptr) {
|
||||
//JIT free
|
||||
if (!current_savestate_buffer.empty()) {
|
||||
printf("\nKV Clear SaveState: Freed %zu MB.\n", current_savestate_size / (1024 * 1024));
|
||||
free(current_savestate_ptr);
|
||||
current_savestate_ptr = nullptr;
|
||||
current_savestate_buffer.clear();
|
||||
if(shrink)
|
||||
{
|
||||
current_savestate_buffer.shrink_to_fit();
|
||||
}
|
||||
savestate_context_tokens.clear();
|
||||
current_savestate_size = 0;
|
||||
return true;
|
||||
|
|
|
@ -440,7 +440,7 @@
|
|||
"info": {
|
||||
"title": "KoboldCpp API",
|
||||
"description": "For swagger.json, <a href=\"?json=1\">click here</a> or use <a href=\"https://lite.koboldai.net/koboldcpp_api.json\">online version</a>.",
|
||||
"version": "2025.01.08"
|
||||
"version": "2025.06.03"
|
||||
},
|
||||
"openapi": "3.0.3",
|
||||
"paths": {
|
||||
|
@ -639,7 +639,7 @@
|
|||
"application/json": {
|
||||
"example": {
|
||||
"result": "KoboldCpp",
|
||||
"version": "2025.01.08",
|
||||
"version": "2025.06.03",
|
||||
"protected": false,
|
||||
"txt2img": false,
|
||||
"vision": false,
|
||||
|
@ -1909,8 +1909,10 @@
|
|||
"application/json": {
|
||||
"example": {
|
||||
"success": true,
|
||||
"old_state": 0,
|
||||
"new_state": 0
|
||||
"old_state_size": 0,
|
||||
"old_tokens": 0,
|
||||
"new_state_size": 0,
|
||||
"new_tokens": 0,
|
||||
},
|
||||
"schema": {
|
||||
"properties": {
|
||||
|
@ -1918,13 +1920,21 @@
|
|||
"type": "boolean",
|
||||
"description": "Whether the operation was successful."
|
||||
},
|
||||
"old_state": {
|
||||
"old_state_size": {
|
||||
"type": "number",
|
||||
"description": "Bytes currently in used for existing save state."
|
||||
},
|
||||
"new_state": {
|
||||
"old_tokens": {
|
||||
"type": "number",
|
||||
"description": "How many tokens in currently existing save state."
|
||||
},
|
||||
"new_state_size": {
|
||||
"type": "number",
|
||||
"description": "Bytes a new save state is estimated to consume."
|
||||
},
|
||||
"new_tokens": {
|
||||
"type": "number",
|
||||
"description": "How many tokens will be stored if a new save state is made."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1947,13 +1957,23 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"example": {
|
||||
"success": true
|
||||
"success": true,
|
||||
"new_state_size": 12345678,
|
||||
"new_tokens": 100,
|
||||
},
|
||||
"schema": {
|
||||
"properties": {
|
||||
"success": {
|
||||
"type": "boolean",
|
||||
"description": "Whether the operation was successful."
|
||||
},
|
||||
"new_state_size": {
|
||||
"type": "number",
|
||||
"description": "Bytes a new save state is estimated to consume."
|
||||
},
|
||||
"new_tokens": {
|
||||
"type": "number",
|
||||
"description": "How many context tokens were saved in state."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1976,13 +1996,18 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"example": {
|
||||
"success": true
|
||||
"success": true,
|
||||
"new_tokens": 100
|
||||
},
|
||||
"schema": {
|
||||
"properties": {
|
||||
"success": {
|
||||
"type": "boolean",
|
||||
"description": "Whether the operation was successful."
|
||||
},
|
||||
"new_tokens": {
|
||||
"type": "number",
|
||||
"description": "How many context tokens were loaded from state."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2423,7 +2448,7 @@
|
|||
"/v1/completions": {
|
||||
"post": {
|
||||
"summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation",
|
||||
"description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions)",
|
||||
"description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions). All KoboldCpp samplers are supported, please refer to /api/v1/generate for more details.",
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
|
@ -2445,7 +2470,7 @@
|
|||
"/v1/chat/completions": {
|
||||
"post": {
|
||||
"summary": "Generates a response from a list of messages. Please refer to OpenAI documentation",
|
||||
"description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat)",
|
||||
"description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat). All KoboldCpp samplers are supported, please refer to /api/v1/generate for more details.",
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
|
|
114
klite.embd
114
klite.embd
|
@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
|
|||
-->
|
||||
|
||||
<script>
|
||||
const LITEVER = 248;
|
||||
const LITEVER = 250;
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
|
||||
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
|
||||
|
@ -2163,6 +2163,9 @@ Current version indicated by LITEVER below.
|
|||
.color_offwhite {
|
||||
color: #bedae9;
|
||||
}
|
||||
.color_white {
|
||||
color: #ffffff;
|
||||
}
|
||||
.color_darkgreen {
|
||||
color: #63975c;
|
||||
}
|
||||
|
@ -2999,6 +3002,8 @@ Current version indicated by LITEVER below.
|
|||
const koboldcpp_tts_endpoint = "/api/extra/tts";
|
||||
const koboldcpp_admin_list_endpoint = "/api/admin/list_options";
|
||||
const koboldcpp_admin_reload_endpoint = "/api/admin/reload_config";
|
||||
const koboldcpp_admin_savestate_endpoint = "/api/admin/save_state";
|
||||
const koboldcpp_admin_loadstate_endpoint = "/api/admin/load_state";
|
||||
const koboldcpp_savedata_list_endpoint = "/api/extra/data/list";
|
||||
const koboldcpp_savedata_save_endpoint = "/api/extra/data/save";
|
||||
const koboldcpp_savedata_load_endpoint = "/api/extra/data/load";
|
||||
|
@ -7362,7 +7367,7 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
} else {
|
||||
//check for tavernai fields
|
||||
let has_tav_wi_check = (new_loaded_storyobj && new_loaded_storyobj.entries && new_loaded_storyobj.entries["0"] && new_loaded_storyobj.entries["0"].hasOwnProperty("uid"));
|
||||
let has_tav_wi_check = has_tavern_wi_check(new_loaded_storyobj);
|
||||
if (!new_loaded_storyobj.scenarioVersion && (new_loaded_storyobj.name != null || new_loaded_storyobj.description != null ||
|
||||
new_loaded_storyobj.personality != null || new_loaded_storyobj.spec=="chara_card_v2" || has_tav_wi_check)) {
|
||||
load_tavern_obj(new_loaded_storyobj);
|
||||
|
@ -7837,6 +7842,18 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
});
|
||||
}
|
||||
function has_tavern_wi_check(obj)
|
||||
{
|
||||
let checkresult = false;
|
||||
if (obj && obj.entries) {
|
||||
const keys = Object.keys(obj.entries);
|
||||
if (keys.length > 0) {
|
||||
const firstEntry = obj.entries[keys[0]];
|
||||
checkresult = firstEntry && firstEntry.hasOwnProperty("uid");
|
||||
}
|
||||
}
|
||||
return checkresult;
|
||||
}
|
||||
|
||||
function load_agnai_wi(obj,chatopponent,myname)
|
||||
{
|
||||
|
@ -7930,7 +7947,7 @@ Current version indicated by LITEVER below.
|
|||
|
||||
function importLorebookAsTextDB(lorebook)
|
||||
{
|
||||
let has_tav_wi_check = (lorebook && lorebook.entries && lorebook.entries["0"] && lorebook.entries["0"].hasOwnProperty("uid"));
|
||||
let has_tav_wi_check = has_tavern_wi_check(lorebook);
|
||||
if (lorebook && has_tav_wi_check)
|
||||
{
|
||||
let lbname = lorebook.name?lorebook.name:"UntitledLorebook";
|
||||
|
@ -8051,7 +8068,7 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
let combinedmem = sysprompt + memory + scenario + examplemsg;
|
||||
let agnaidatafieldsempty = scenario + examplemsg + (obj.personality?obj.personality:"") + greeting;
|
||||
let has_tav_wi_check = (obj && obj.entries && obj.entries["0"] && obj.entries["0"].hasOwnProperty("uid"));
|
||||
let has_tav_wi_check = has_tavern_wi_check(obj);
|
||||
//check if it's a world info only card, if so, do not restart game
|
||||
if(combinedmem.trim()=="" && greeting=="" && has_tav_wi_check)
|
||||
{
|
||||
|
@ -8102,6 +8119,10 @@ Current version indicated by LITEVER below.
|
|||
{
|
||||
current_wi = load_agnai_wi(obj,chatopponent,myname);
|
||||
}
|
||||
else if (has_tav_wi_check)
|
||||
{
|
||||
current_wi = load_tavern_wi(obj,chatopponent,myname);
|
||||
}
|
||||
}
|
||||
update_for_sidepanel();
|
||||
render_gametext(true);
|
||||
|
@ -11074,6 +11095,7 @@ Current version indicated by LITEVER below.
|
|||
function display_admin_container()
|
||||
{
|
||||
mainmenu_untab(false);
|
||||
document.getElementById("loadstatetxt").innerText = "";
|
||||
let fetch_kcpps_configs = function(adminkey)
|
||||
{
|
||||
let header = {'Content-Type': 'application/json'};
|
||||
|
@ -11189,6 +11211,62 @@ Current version indicated by LITEVER below.
|
|||
});
|
||||
}
|
||||
|
||||
function trigger_admin_savestate()
|
||||
{
|
||||
document.getElementById("loadstatetxt").innerText = "Saving State...";
|
||||
let header = {'Content-Type': 'application/json'};
|
||||
if(last_admin_key!="")
|
||||
{
|
||||
header['Authorization'] = 'Bearer ' + last_admin_key;
|
||||
}
|
||||
fetch(custom_kobold_endpoint + koboldcpp_admin_savestate_endpoint, {
|
||||
method: 'POST',
|
||||
headers: header
|
||||
})
|
||||
.then(x => x.json())
|
||||
.then(values => {
|
||||
console.log(values);
|
||||
if(values.success)
|
||||
{
|
||||
document.getElementById("loadstatetxt").innerText = `State Saved (${values.new_tokens} tokens in ${parseInt(values.new_state_size/(1024*1024))} MB)`;
|
||||
}else{
|
||||
document.getElementById("loadstatetxt").innerText = `Save State Failed!`;
|
||||
}
|
||||
}).catch((error) => {
|
||||
console.log("Error: " + error);
|
||||
document.getElementById("loadstatetxt").innerText = `Save State Failed!`;
|
||||
msgbox(error,"Error");
|
||||
});
|
||||
}
|
||||
|
||||
function trigger_admin_loadstate()
|
||||
{
|
||||
document.getElementById("loadstatetxt").innerText = "Loading State...";
|
||||
let header = {'Content-Type': 'application/json'};
|
||||
if(last_admin_key!="")
|
||||
{
|
||||
header['Authorization'] = 'Bearer ' + last_admin_key;
|
||||
}
|
||||
fetch(custom_kobold_endpoint + koboldcpp_admin_loadstate_endpoint, {
|
||||
method: 'POST',
|
||||
headers: header
|
||||
})
|
||||
.then(x => x.json())
|
||||
.then(values => {
|
||||
console.log(values);
|
||||
if(values.success)
|
||||
{
|
||||
document.getElementById("loadstatetxt").innerText = `State Loaded (${values.new_tokens} tokens)`;
|
||||
}else{
|
||||
document.getElementById("loadstatetxt").innerText = `Load State Failed!`;
|
||||
}
|
||||
}).catch((error) => {
|
||||
console.log("Error: " + error);
|
||||
document.getElementById("loadstatetxt").innerText = `Load State Failed!`;
|
||||
msgbox(error,"Error");
|
||||
});
|
||||
}
|
||||
|
||||
var cachedsaveslotlabels = [];
|
||||
var netsaveslotlabels = [];
|
||||
function saveloadchangeslot(updatelist=false)
|
||||
|
@ -22754,7 +22832,7 @@ Current version indicated by LITEVER below.
|
|||
|
||||
<div style="float:right;">
|
||||
<div class="settinglabel">
|
||||
<button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="wi_group_export()">[Export / Import Group]</button>
|
||||
<button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="wi_group_export()">[Edit Group]</button>
|
||||
<button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="export_wi_to_file()">[Export all WI to file]</button>
|
||||
<button type="button" class="btn purplebtn widelbtn" style="padding:4px;margin:2px;margin-top:4px;margin-bottom:4px;font-size:8px" id="wiexport" onclick="import_wi_from_file()">[Import all WI from file]</button>
|
||||
</div>
|
||||
|
@ -24698,21 +24776,31 @@ Current version indicated by LITEVER below.
|
|||
|
||||
<div class="popupcontainer flex hidden" id="admincontainer">
|
||||
<div class="popupbg flex"></div>
|
||||
<div class="nspopup flexsizevsmall">
|
||||
<div class="nspopup flexsizesmall">
|
||||
<div class="popuptitlebar">
|
||||
<div class="popuptitletext">Change Loaded KoboldCpp Config</div>
|
||||
<div class="popuptitletext">KoboldCpp Admin Config</div>
|
||||
</div>
|
||||
<div class="menutext">
|
||||
<b></b>Warning: This will terminate the current KoboldCpp instance and relaunch it with a new config.</b><br><br>
|
||||
If an invalid configuration is selected, the new server may fail to relaunch!<br><br>
|
||||
<br>
|
||||
<div>
|
||||
<select title="Select New Config" style="padding:4px;" class="form-control" id="adminconfigdropdown">
|
||||
</select>
|
||||
<b class="color_white" style="padding: 5px;">Save / Load Context State:</b><br>
|
||||
<div style="display:flex;padding: 5px;">
|
||||
<button type="button" style="margin:2px;width:50%" class="btn btn-primary" onclick="trigger_admin_savestate()">Save State</button>
|
||||
<button type="button" style="margin:2px;width:50%" class="btn btn-primary" onclick="trigger_admin_loadstate()">Load State</button>
|
||||
</div>
|
||||
<div class="menutext" id="loadstatetxt"></div>
|
||||
</div>
|
||||
<br>
|
||||
<div>
|
||||
<b class="color_white" style="padding: 5px;">Change Loaded Model / Config:</b><br>
|
||||
<div style="display:flex;padding: 5px;">
|
||||
<select title="Select New Config" style="padding:4px; width:calc(100% - 150px)" class="form-control" id="adminconfigdropdown">
|
||||
</select>
|
||||
<button type="button" style="margin-left:2px;width:146px" class="btn btn-primary" onclick="trigger_admin_reload()">Reload KoboldCpp</button>
|
||||
</div>
|
||||
<div class="menutext">Warning: This will terminate the current KoboldCpp instance and relaunch it with a new config. If an invalid configuration is selected, the new server may fail to relaunch!</div>
|
||||
<br>
|
||||
</div>
|
||||
<div class="popupfooter">
|
||||
<button type="button" style="width:200px" class="btn btn-primary" onclick="trigger_admin_reload()">Reload KoboldCpp</button>
|
||||
<button type="button" class="btn btn-primary" onclick="hide_popups()">Cancel</button>
|
||||
</div>
|
||||
</div>
|
||||
|
|
105
koboldcpp.py
105
koboldcpp.py
|
@ -523,7 +523,9 @@ def init_library():
|
|||
handle.get_chat_template.restype = ctypes.c_char_p
|
||||
handle.calc_new_state_kv.restype = ctypes.c_size_t
|
||||
handle.calc_old_state_kv.restype = ctypes.c_size_t
|
||||
handle.save_state_kv.restype = ctypes.c_bool
|
||||
handle.calc_new_state_tokencount.restype = ctypes.c_size_t
|
||||
handle.calc_old_state_tokencount.restype = ctypes.c_size_t
|
||||
handle.save_state_kv.restype = ctypes.c_size_t
|
||||
handle.load_state_kv.restype = ctypes.c_bool
|
||||
handle.clear_state_kv.restype = ctypes.c_bool
|
||||
handle.sd_load_model.argtypes = [sd_load_model_inputs]
|
||||
|
@ -3090,7 +3092,7 @@ Change Mode<br>
|
|||
|
||||
elif self.path=="/v1":
|
||||
content_type = 'text/html'
|
||||
response_body = ("KoboldCpp OpenAI compatible endpoint is running!\n\nFor usage reference, see https://platform.openai.com/docs/api-reference").encode()
|
||||
response_body = ("KoboldCpp OpenAI compatible endpoint is running!<br>For usage reference, see <a href='https://platform.openai.com/docs/api-reference'>https://platform.openai.com/docs/api-reference</a><br>For other endpoints, see <a href='/api'>KoboldCpp API Documentation</a>").encode()
|
||||
|
||||
elif self.path=="/api/extra/preloadstory":
|
||||
if preloaded_story is None:
|
||||
|
@ -3457,32 +3459,6 @@ Change Mode<br>
|
|||
resp = {"success": True}
|
||||
response_body = (json.dumps(resp).encode())
|
||||
|
||||
elif self.path.endswith('/api/admin/check_state'):
|
||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||
newstate = handle.calc_new_state_kv()
|
||||
oldstate = handle.calc_old_state_kv()
|
||||
response_body = (json.dumps({"success": True, "old_state":oldstate, "new_state":newstate}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success": False}).encode())
|
||||
elif self.path.endswith('/api/admin/load_state'):
|
||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||
result = handle.load_state_kv()
|
||||
response_body = (json.dumps({"success": result}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success": False}).encode())
|
||||
elif self.path.endswith('/api/admin/save_state'):
|
||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||
result = handle.save_state_kv()
|
||||
response_body = (json.dumps({"success": result}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success": False}).encode())
|
||||
elif self.path.endswith('/api/admin/clear_state'):
|
||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||
result = handle.clear_state_kv()
|
||||
response_body = (json.dumps({"success": result}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success": False}).encode())
|
||||
|
||||
elif self.path.endswith('/set_tts_settings'): #return dummy response
|
||||
response_body = (json.dumps({"message": "Settings successfully applied"}).encode())
|
||||
|
||||
|
@ -3532,33 +3508,58 @@ Change Mode<br>
|
|||
if reqblocking:
|
||||
requestsinqueue = (requestsinqueue - 1) if requestsinqueue > 0 else 0
|
||||
|
||||
# handle endpoints that require mutex locking and handle actual gens
|
||||
try:
|
||||
sse_stream_flag = False
|
||||
|
||||
api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama,7=ollamachat
|
||||
is_imggen = False
|
||||
is_comfyui_imggen = False
|
||||
is_transcribe = False
|
||||
is_tts = False
|
||||
is_embeddings = False
|
||||
response_body = None
|
||||
|
||||
if self.path.endswith('/request'):
|
||||
if self.path.endswith('/api/admin/check_state'):
|
||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||
newstate = handle.calc_new_state_kv()
|
||||
oldstate = handle.calc_old_state_kv()
|
||||
newtokencnt = handle.calc_new_state_tokencount()
|
||||
oldtokencnt = handle.calc_old_state_tokencount()
|
||||
response_body = (json.dumps({"success": True, "old_state_size":oldstate, "old_tokens":oldtokencnt, "new_state_size":newstate, "new_tokens":newtokencnt}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success": False, "old_state_size":0, "old_tokens":0, "new_state_size":0, "new_tokens":0}).encode())
|
||||
elif self.path.endswith('/api/admin/load_state'):
|
||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||
result = handle.load_state_kv()
|
||||
tokencnt = handle.calc_new_state_tokencount()
|
||||
response_body = (json.dumps({"success": result, "new_tokens":tokencnt}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success": False, "new_tokens":0}).encode())
|
||||
elif self.path.endswith('/api/admin/save_state'):
|
||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||
result = handle.save_state_kv()
|
||||
tokencnt = handle.calc_new_state_tokencount()
|
||||
response_body = (json.dumps({"success": (result>0), "new_state_size":result, "new_tokens":tokencnt}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success": False, "new_state_size":0, "new_tokens":0}).encode())
|
||||
elif self.path.endswith('/api/admin/clear_state'):
|
||||
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
|
||||
result = handle.clear_state_kv()
|
||||
response_body = (json.dumps({"success": result}).encode())
|
||||
else:
|
||||
response_body = (json.dumps({"success": False}).encode())
|
||||
elif self.path.endswith('/request'):
|
||||
api_format = 1
|
||||
|
||||
if self.path.endswith(('/api/v1/generate', '/api/latest/generate')):
|
||||
elif self.path.endswith(('/api/v1/generate', '/api/latest/generate')):
|
||||
api_format = 2
|
||||
|
||||
if self.path.endswith('/api/extra/generate/stream'):
|
||||
elif self.path.endswith('/api/extra/generate/stream'):
|
||||
api_format = 2
|
||||
sse_stream_flag = True
|
||||
|
||||
if self.path.endswith('/v1/completions') or self.path.endswith('/v1/completion'):
|
||||
elif self.path.endswith('/v1/completions') or self.path.endswith('/v1/completion'):
|
||||
api_format = 3
|
||||
|
||||
if self.path.endswith('/v1/chat/completions'):
|
||||
elif self.path.endswith('/v1/chat/completions'):
|
||||
api_format = 4
|
||||
|
||||
if self.path.endswith('/sdapi/v1/interrogate'):
|
||||
elif self.path.endswith('/sdapi/v1/interrogate'):
|
||||
has_vision = (mmprojpath!="")
|
||||
if not has_vision:
|
||||
self.send_response(503)
|
||||
|
@ -3569,27 +3570,27 @@ Change Mode<br>
|
|||
}}).encode())
|
||||
return
|
||||
api_format = 5
|
||||
|
||||
if self.path.endswith('/api/generate'):
|
||||
elif self.path.endswith('/api/generate'):
|
||||
api_format = 6
|
||||
if self.path.endswith('/api/chat'):
|
||||
elif self.path.endswith('/api/chat'):
|
||||
api_format = 7
|
||||
|
||||
if self.path=="/prompt" or self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):
|
||||
elif self.path=="/prompt" or self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):
|
||||
is_imggen = True
|
||||
if self.path=="/prompt":
|
||||
is_comfyui_imggen = True
|
||||
|
||||
if self.path.endswith('/api/extra/transcribe') or self.path.endswith('/v1/audio/transcriptions'):
|
||||
elif self.path.endswith('/api/extra/transcribe') or self.path.endswith('/v1/audio/transcriptions'):
|
||||
is_transcribe = True
|
||||
|
||||
if self.path.endswith('/api/extra/tts') or self.path.endswith('/v1/audio/speech') or self.path.endswith('/tts_to_audio'):
|
||||
elif self.path.endswith('/api/extra/tts') or self.path.endswith('/v1/audio/speech') or self.path.endswith('/tts_to_audio'):
|
||||
is_tts = True
|
||||
|
||||
if self.path.endswith('/api/extra/embeddings') or self.path.endswith('/v1/embeddings'):
|
||||
elif self.path.endswith('/api/extra/embeddings') or self.path.endswith('/v1/embeddings'):
|
||||
is_embeddings = True
|
||||
|
||||
if is_imggen or is_transcribe or is_tts or is_embeddings or api_format > 0:
|
||||
if response_body is not None:
|
||||
self.send_response(response_code)
|
||||
self.send_header('content-length', str(len(response_body)))
|
||||
self.end_headers(content_type='application/json')
|
||||
self.wfile.write(response_body)
|
||||
elif is_imggen or is_transcribe or is_tts or is_embeddings or api_format > 0:
|
||||
global last_req_time
|
||||
last_req_time = time.time()
|
||||
|
||||
|
|
|
@ -130,7 +130,9 @@ void ContextFastForward(std::vector<int> ¤t_context_tokens, std::vector<in
|
|||
const bool useSmartContext, const bool requireFullSubset);
|
||||
|
||||
size_t gpttype_calc_new_state_kv();
|
||||
size_t gpttype_calc_new_state_tokencount();
|
||||
size_t gpttype_calc_old_state_kv();
|
||||
bool gpttype_save_state_kv();
|
||||
size_t gpttype_calc_old_state_tokencount();
|
||||
size_t gpttype_save_state_kv();
|
||||
bool gpttype_load_state_kv();
|
||||
bool gpttype_clear_state_kv();
|
||||
bool gpttype_clear_state_kv(bool shrink);
|
Loading…
Add table
Add a link
Reference in a new issue