mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
horde workers pause themselves if recent local usage is detected (+1 squashed commits)
Squashed commits: [7ebb80bc] horde workers pause themselves if recent local usage is detected
This commit is contained in:
parent
81ac0e5656
commit
aa5124439d
1 changed files with 23 additions and 0 deletions
23
koboldcpp.py
23
koboldcpp.py
|
@ -644,6 +644,7 @@ sslvalid = False
|
||||||
nocertify = False
|
nocertify = False
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
last_req_time = time.time()
|
last_req_time = time.time()
|
||||||
|
last_non_horde_req_time = time.time()
|
||||||
|
|
||||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
sys_version = ""
|
sys_version = ""
|
||||||
|
@ -665,6 +666,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def generate_text(self, genparams, api_format, stream_flag):
|
async def generate_text(self, genparams, api_format, stream_flag):
|
||||||
|
from datetime import datetime
|
||||||
global friendlymodelname, chatcompl_adapter
|
global friendlymodelname, chatcompl_adapter
|
||||||
is_quiet = args.quiet
|
is_quiet = args.quiet
|
||||||
def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
|
def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
|
||||||
|
@ -749,6 +751,12 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
genparams["max_length"] = 32
|
genparams["max_length"] = 32
|
||||||
genparams["prompt"] = "### Instruction: In one sentence, write a descriptive caption for this image.\n### Response:"
|
genparams["prompt"] = "### Instruction: In one sentence, write a descriptive caption for this image.\n### Response:"
|
||||||
|
|
||||||
|
#flag instance as non-idle for a while
|
||||||
|
washordereq = genparams.get('genkey', '').startswith('HORDEREQ_')
|
||||||
|
if not washordereq:
|
||||||
|
global last_non_horde_req_time
|
||||||
|
last_non_horde_req_time = time.time()
|
||||||
|
|
||||||
return generate(
|
return generate(
|
||||||
prompt=genparams.get('prompt', ""),
|
prompt=genparams.get('prompt', ""),
|
||||||
memory=genparams.get('memory', ""),
|
memory=genparams.get('memory', ""),
|
||||||
|
@ -792,6 +800,12 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
else:
|
else:
|
||||||
recvtxt = run_blocking()
|
recvtxt = run_blocking()
|
||||||
|
|
||||||
|
#flag instance as non-idle for a while
|
||||||
|
washordereq = genparams.get('genkey', '').startswith('HORDEREQ_')
|
||||||
|
if not washordereq:
|
||||||
|
global last_non_horde_req_time
|
||||||
|
last_non_horde_req_time = time.time()
|
||||||
|
|
||||||
if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1:
|
if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1:
|
||||||
utfprint("\nOutput: " + recvtxt)
|
utfprint("\nOutput: " + recvtxt)
|
||||||
|
|
||||||
|
@ -2469,6 +2483,14 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
else:
|
else:
|
||||||
print_with_time(f"Horde Worker Exit limit reached, too many errors.")
|
print_with_time(f"Horde Worker Exit limit reached, too many errors.")
|
||||||
|
|
||||||
|
global last_non_horde_req_time
|
||||||
|
sec_since_non_horde = time.time() - last_non_horde_req_time
|
||||||
|
no_recent_local_usage = sec_since_non_horde>20
|
||||||
|
if not no_recent_local_usage:
|
||||||
|
#print_with_time(f"Recent Local Usage - Horde Worker Waiting...")
|
||||||
|
time.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
#first, make sure we are not generating
|
#first, make sure we are not generating
|
||||||
if modelbusy.locked():
|
if modelbusy.locked():
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
@ -2516,6 +2538,7 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
currentjob_attempts += 1
|
currentjob_attempts += 1
|
||||||
if currentjob_attempts>5:
|
if currentjob_attempts>5:
|
||||||
break
|
break
|
||||||
|
|
||||||
print_with_time(f"Server Busy - Not ready to generate...")
|
print_with_time(f"Server Busy - Not ready to generate...")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue