mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
better abort handling, added support for dynatemp exponent
This commit is contained in:
parent
f96f29be7b
commit
08236ccc97
7 changed files with 110 additions and 50 deletions
22
koboldcpp.py
22
koboldcpp.py
|
@ -79,6 +79,7 @@ class generation_inputs(ctypes.Structure):
|
|||
("grammar_retain_state", ctypes.c_bool),
|
||||
("quiet", ctypes.c_bool),
|
||||
("dynatemp_range", ctypes.c_float),
|
||||
("dynatemp_exponent", ctypes.c_float),
|
||||
("logit_biases", logit_bias * logit_bias_max)]
|
||||
|
||||
class generation_outputs(ctypes.Structure):
|
||||
|
@ -311,7 +312,7 @@ def load_model(model_filename):
|
|||
ret = handle.load_model(inputs)
|
||||
return ret
|
||||
|
||||
def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, logit_biases={}):
|
||||
def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, logit_biases={}):
|
||||
global maxctx, args, currentusergenkey, totalgens
|
||||
inputs = generation_inputs()
|
||||
outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs))
|
||||
|
@ -340,6 +341,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
|
|||
inputs.stream_sse = stream_sse
|
||||
inputs.quiet = quiet
|
||||
inputs.dynatemp_range = dynatemp_range
|
||||
inputs.dynatemp_exponent = dynatemp_exponent
|
||||
inputs.grammar = grammar.encode("UTF-8")
|
||||
inputs.grammar_retain_state = grammar_retain_state
|
||||
inputs.unban_tokens_rt = not use_default_badwordsids
|
||||
|
@ -558,6 +560,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
trimstop=genparams.get('trim_stop', False),
|
||||
quiet=is_quiet,
|
||||
dynatemp_range=genparams.get('dynatemp_range', 0.0),
|
||||
dynatemp_exponent=genparams.get('dynatemp_exponent', 1.0),
|
||||
logit_biases=genparams.get('logit_bias', {})
|
||||
)
|
||||
|
||||
|
@ -652,8 +655,10 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
await self.send_oai_sse_event('[DONE]')
|
||||
break
|
||||
except Exception as ex:
|
||||
print("SSE streaming was interrupted due to an exception")
|
||||
print("Token streaming was interrupted or aborted!")
|
||||
print(ex)
|
||||
handle.abort_generate()
|
||||
time.sleep(0.2) #short delay
|
||||
|
||||
# flush buffers, sleep a bit to make sure all data sent, and then force close the connection
|
||||
self.wfile.flush()
|
||||
|
@ -665,17 +670,18 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
async def handle_request(self, genparams, api_format, stream_flag):
|
||||
tasks = []
|
||||
|
||||
if stream_flag:
|
||||
tasks.append(self.handle_sse_stream(api_format))
|
||||
|
||||
generate_task = asyncio.create_task(self.generate_text(genparams, api_format, stream_flag))
|
||||
tasks.append(generate_task)
|
||||
|
||||
try:
|
||||
if stream_flag:
|
||||
tasks.append(self.handle_sse_stream(api_format))
|
||||
|
||||
generate_task = asyncio.create_task(self.generate_text(genparams, api_format, stream_flag))
|
||||
tasks.append(generate_task)
|
||||
|
||||
await asyncio.gather(*tasks)
|
||||
generate_result = generate_task.result()
|
||||
return generate_result
|
||||
except (BrokenPipeError, ConnectionAbortedError) as cae: # attempt to abort if connection lost
|
||||
print("An ongoing connection was aborted or interrupted!")
|
||||
print(cae)
|
||||
handle.abort_generate()
|
||||
time.sleep(0.2) #short delay
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue