prompt command done

This commit is contained in:
Concedo 2024-08-07 20:52:28 +08:00
parent 853d57c53c
commit 03adb90dc6
2 changed files with 20 additions and 7 deletions

View file

@ -29,8 +29,8 @@ jobs:
- name: Test - name: Test
id: test id: test
run: | run: |
wget https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf wget https://huggingface.co/concedo/koboldcpp/resolve/main/baby_llama.gguf
dist/koboldcpp-mac-arm64 --model KobbleTiny-Q4_K.gguf --gpulayers 99 --benchmark dist/bench.csv dist/koboldcpp-mac-arm64 --model baby_llama.gguf --gpulayers 99 --benchmark --prompt 'Hi, my name is'
- name: Save artifact - name: Save artifact
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3

View file

@ -875,15 +875,17 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
inputs.images[n] = "".encode("UTF-8") inputs.images[n] = "".encode("UTF-8")
else: else:
inputs.images[n] = images[n].encode("UTF-8") inputs.images[n] = images[n].encode("UTF-8")
if max_length >= (max_context_length-1):
max_length = max_context_length-1
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
global showmaxctxwarning global showmaxctxwarning
if max_context_length > maxctx: if max_context_length > maxctx:
if showmaxctxwarning: if showmaxctxwarning:
print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)") print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)")
showmaxctxwarning = False showmaxctxwarning = False
max_context_length = maxctx max_context_length = maxctx
min_remain = min(max_context_length-4, 16)
if max_length >= (max_context_length-min_remain):
max_length = max_context_length-min_remain
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
inputs.max_context_length = max_context_length # this will resize the context buffer if changed inputs.max_context_length = max_context_length # this will resize the context buffer if changed
inputs.max_length = max_length inputs.max_length = max_length
inputs.temperature = temperature inputs.temperature = temperature
@ -1471,7 +1473,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
current_token = 0 current_token = 0
incomplete_token_buffer = bytearray() incomplete_token_buffer = bytearray()
async_sleep_short = 0.02 async_sleep_short = 0.02
await asyncio.sleep(0.25) #anti race condition, prevent check from overtaking generate await asyncio.sleep(0.3) #anti race condition, prevent check from overtaking generate
try: try:
tokenReserve = "" #keeps fully formed tokens that we cannot send out yet tokenReserve = "" #keeps fully formed tokens that we cannot send out yet
while True: while True:
@ -2042,6 +2044,7 @@ Enter Prompt:<br>
return return
finally: finally:
time.sleep(0.05)
modelbusy.release() modelbusy.release()
self.send_response(404) self.send_response(404)
@ -4209,10 +4212,20 @@ def main(launch_args,start_server=True):
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="") save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
benchmaxctx = maxctx benchmaxctx = maxctx
benchlen = 100 benchlen = 100
benchtemp = 0.1
benchtopk = 1
benchreppen = 1
benchbaneos = True
benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0]) benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0])
benchprompt = "" benchprompt = ""
if args.prompt: if args.prompt:
benchprompt = args.prompt benchprompt = args.prompt
benchtopk = 100
benchreppen = 1.07
benchtemp = 0.8
if not args.benchmark:
benchbaneos = False
benchlen = 256
if args.benchmark: if args.benchmark:
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000: if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.") print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
@ -4225,7 +4238,7 @@ def main(launch_args,start_server=True):
benchprompt = "1111111111111111" benchprompt = "1111111111111111"
for i in range(0,14): #generate massive prompt for i in range(0,14): #generate massive prompt
benchprompt += benchprompt benchprompt += benchprompt
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,ban_eos_token=True) genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=benchtemp,top_k=benchtopk,rep_pen=benchreppen,ban_eos_token=benchbaneos)
result = genout['text'] result = genout['text']
if args.prompt and not args.benchmark: if args.prompt and not args.benchmark:
restore_stdout() restore_stdout()