prompt command done

This commit is contained in:
Concedo 2024-08-07 20:52:28 +08:00
parent 853d57c53c
commit 03adb90dc6
2 changed files with 20 additions and 7 deletions

View file

@ -29,8 +29,8 @@ jobs:
- name: Test
id: test
run: |
wget https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf
dist/koboldcpp-mac-arm64 --model KobbleTiny-Q4_K.gguf --gpulayers 99 --benchmark dist/bench.csv
wget https://huggingface.co/concedo/koboldcpp/resolve/main/baby_llama.gguf
dist/koboldcpp-mac-arm64 --model baby_llama.gguf --gpulayers 99 --benchmark --prompt 'Hi, my name is'
- name: Save artifact
uses: actions/upload-artifact@v3

View file

@ -875,15 +875,17 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
inputs.images[n] = "".encode("UTF-8")
else:
inputs.images[n] = images[n].encode("UTF-8")
if max_length >= (max_context_length-1):
max_length = max_context_length-1
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
global showmaxctxwarning
if max_context_length > maxctx:
if showmaxctxwarning:
print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)")
showmaxctxwarning = False
max_context_length = maxctx
min_remain = min(max_context_length-4, 16)
if max_length >= (max_context_length-min_remain):
max_length = max_context_length-min_remain
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
inputs.max_context_length = max_context_length # this will resize the context buffer if changed
inputs.max_length = max_length
inputs.temperature = temperature
@ -1471,7 +1473,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
current_token = 0
incomplete_token_buffer = bytearray()
async_sleep_short = 0.02
await asyncio.sleep(0.25) #anti race condition, prevent check from overtaking generate
await asyncio.sleep(0.3) #anti race condition, prevent check from overtaking generate
try:
tokenReserve = "" #keeps fully formed tokens that we cannot send out yet
while True:
@ -2042,6 +2044,7 @@ Enter Prompt:<br>
return
finally:
time.sleep(0.05)
modelbusy.release()
self.send_response(404)
@ -4209,10 +4212,20 @@ def main(launch_args,start_server=True):
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
benchmaxctx = maxctx
benchlen = 100
benchtemp = 0.1
benchtopk = 1
benchreppen = 1
benchbaneos = True
benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0])
benchprompt = ""
if args.prompt:
benchprompt = args.prompt
benchtopk = 100
benchreppen = 1.07
benchtemp = 0.8
if not args.benchmark:
benchbaneos = False
benchlen = 256
if args.benchmark:
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
@ -4225,7 +4238,7 @@ def main(launch_args,start_server=True):
benchprompt = "1111111111111111"
for i in range(0,14): #generate massive prompt
benchprompt += benchprompt
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,ban_eos_token=True)
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=benchtemp,top_k=benchtopk,rep_pen=benchreppen,ban_eos_token=benchbaneos)
result = genout['text']
if args.prompt and not args.benchmark:
restore_stdout()