use a static buffer for kv reloads instead. also, added into lite ui

This commit is contained in:
Concedo 2025-06-03 22:32:46 +08:00
parent 4b57108508
commit 53f1511396
6 changed files with 239 additions and 99 deletions

View file

@ -523,7 +523,9 @@ def init_library():
handle.get_chat_template.restype = ctypes.c_char_p
handle.calc_new_state_kv.restype = ctypes.c_size_t
handle.calc_old_state_kv.restype = ctypes.c_size_t
handle.save_state_kv.restype = ctypes.c_bool
handle.calc_new_state_tokencount.restype = ctypes.c_size_t
handle.calc_old_state_tokencount.restype = ctypes.c_size_t
handle.save_state_kv.restype = ctypes.c_size_t
handle.load_state_kv.restype = ctypes.c_bool
handle.clear_state_kv.restype = ctypes.c_bool
handle.sd_load_model.argtypes = [sd_load_model_inputs]
@ -3090,7 +3092,7 @@ Change Mode<br>
elif self.path=="/v1":
content_type = 'text/html'
response_body = ("KoboldCpp OpenAI compatible endpoint is running!\n\nFor usage reference, see https://platform.openai.com/docs/api-reference").encode()
response_body = ("KoboldCpp OpenAI compatible endpoint is running!<br>For usage reference, see <a href='https://platform.openai.com/docs/api-reference'>https://platform.openai.com/docs/api-reference</a><br>For other endpoints, see <a href='/api'>KoboldCpp API Documentation</a>").encode()
elif self.path=="/api/extra/preloadstory":
if preloaded_story is None:
@ -3457,32 +3459,6 @@ Change Mode<br>
resp = {"success": True}
response_body = (json.dumps(resp).encode())
elif self.path.endswith('/api/admin/check_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
newstate = handle.calc_new_state_kv()
oldstate = handle.calc_old_state_kv()
response_body = (json.dumps({"success": True, "old_state":oldstate, "new_state":newstate}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/api/admin/load_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.load_state_kv()
response_body = (json.dumps({"success": result}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/api/admin/save_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.save_state_kv()
response_body = (json.dumps({"success": result}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/api/admin/clear_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.clear_state_kv()
response_body = (json.dumps({"success": result}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/set_tts_settings'): #return dummy response
response_body = (json.dumps({"message": "Settings successfully applied"}).encode())
@ -3532,33 +3508,58 @@ Change Mode<br>
if reqblocking:
requestsinqueue = (requestsinqueue - 1) if requestsinqueue > 0 else 0
# handle endpoints that require mutex locking and handle actual gens
try:
sse_stream_flag = False
api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama,7=ollamachat
is_imggen = False
is_comfyui_imggen = False
is_transcribe = False
is_tts = False
is_embeddings = False
response_body = None
if self.path.endswith('/request'):
if self.path.endswith('/api/admin/check_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
newstate = handle.calc_new_state_kv()
oldstate = handle.calc_old_state_kv()
newtokencnt = handle.calc_new_state_tokencount()
oldtokencnt = handle.calc_old_state_tokencount()
response_body = (json.dumps({"success": True, "old_state_size":oldstate, "old_tokens":oldtokencnt, "new_state_size":newstate, "new_tokens":newtokencnt}).encode())
else:
response_body = (json.dumps({"success": False, "old_state_size":0, "old_tokens":0, "new_state_size":0, "new_tokens":0}).encode())
elif self.path.endswith('/api/admin/load_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.load_state_kv()
tokencnt = handle.calc_new_state_tokencount()
response_body = (json.dumps({"success": result, "new_tokens":tokencnt}).encode())
else:
response_body = (json.dumps({"success": False, "new_tokens":0}).encode())
elif self.path.endswith('/api/admin/save_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.save_state_kv()
tokencnt = handle.calc_new_state_tokencount()
response_body = (json.dumps({"success": (result>0), "new_state_size":result, "new_tokens":tokencnt}).encode())
else:
response_body = (json.dumps({"success": False, "new_state_size":0, "new_tokens":0}).encode())
elif self.path.endswith('/api/admin/clear_state'):
if global_memory and args.admin and args.admindir and os.path.exists(args.admindir) and self.check_header_password(args.adminpassword):
result = handle.clear_state_kv()
response_body = (json.dumps({"success": result}).encode())
else:
response_body = (json.dumps({"success": False}).encode())
elif self.path.endswith('/request'):
api_format = 1
if self.path.endswith(('/api/v1/generate', '/api/latest/generate')):
elif self.path.endswith(('/api/v1/generate', '/api/latest/generate')):
api_format = 2
if self.path.endswith('/api/extra/generate/stream'):
elif self.path.endswith('/api/extra/generate/stream'):
api_format = 2
sse_stream_flag = True
if self.path.endswith('/v1/completions') or self.path.endswith('/v1/completion'):
elif self.path.endswith('/v1/completions') or self.path.endswith('/v1/completion'):
api_format = 3
if self.path.endswith('/v1/chat/completions'):
elif self.path.endswith('/v1/chat/completions'):
api_format = 4
if self.path.endswith('/sdapi/v1/interrogate'):
elif self.path.endswith('/sdapi/v1/interrogate'):
has_vision = (mmprojpath!="")
if not has_vision:
self.send_response(503)
@ -3569,27 +3570,27 @@ Change Mode<br>
}}).encode())
return
api_format = 5
if self.path.endswith('/api/generate'):
elif self.path.endswith('/api/generate'):
api_format = 6
if self.path.endswith('/api/chat'):
elif self.path.endswith('/api/chat'):
api_format = 7
if self.path=="/prompt" or self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):
elif self.path=="/prompt" or self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):
is_imggen = True
if self.path=="/prompt":
is_comfyui_imggen = True
if self.path.endswith('/api/extra/transcribe') or self.path.endswith('/v1/audio/transcriptions'):
elif self.path.endswith('/api/extra/transcribe') or self.path.endswith('/v1/audio/transcriptions'):
is_transcribe = True
if self.path.endswith('/api/extra/tts') or self.path.endswith('/v1/audio/speech') or self.path.endswith('/tts_to_audio'):
elif self.path.endswith('/api/extra/tts') or self.path.endswith('/v1/audio/speech') or self.path.endswith('/tts_to_audio'):
is_tts = True
if self.path.endswith('/api/extra/embeddings') or self.path.endswith('/v1/embeddings'):
elif self.path.endswith('/api/extra/embeddings') or self.path.endswith('/v1/embeddings'):
is_embeddings = True
if is_imggen or is_transcribe or is_tts or is_embeddings or api_format > 0:
if response_body is not None:
self.send_response(response_code)
self.send_header('content-length', str(len(response_body)))
self.end_headers(content_type='application/json')
self.wfile.write(response_body)
elif is_imggen or is_transcribe or is_tts or is_embeddings or api_format > 0:
global last_req_time
last_req_time = time.time()