mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
prompt command done
This commit is contained in:
parent
853d57c53c
commit
03adb90dc6
2 changed files with 20 additions and 7 deletions
|
@ -29,8 +29,8 @@ jobs:
|
|||
- name: Test
|
||||
id: test
|
||||
run: |
|
||||
wget https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf
|
||||
dist/koboldcpp-mac-arm64 --model KobbleTiny-Q4_K.gguf --gpulayers 99 --benchmark dist/bench.csv
|
||||
wget https://huggingface.co/concedo/koboldcpp/resolve/main/baby_llama.gguf
|
||||
dist/koboldcpp-mac-arm64 --model baby_llama.gguf --gpulayers 99 --benchmark --prompt 'Hi, my name is'
|
||||
|
||||
- name: Save artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
|
|
23
koboldcpp.py
23
koboldcpp.py
|
@ -875,15 +875,17 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
|
|||
inputs.images[n] = "".encode("UTF-8")
|
||||
else:
|
||||
inputs.images[n] = images[n].encode("UTF-8")
|
||||
if max_length >= (max_context_length-1):
|
||||
max_length = max_context_length-1
|
||||
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
|
||||
global showmaxctxwarning
|
||||
if max_context_length > maxctx:
|
||||
if showmaxctxwarning:
|
||||
print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)")
|
||||
showmaxctxwarning = False
|
||||
max_context_length = maxctx
|
||||
min_remain = min(max_context_length-4, 16)
|
||||
if max_length >= (max_context_length-min_remain):
|
||||
max_length = max_context_length-min_remain
|
||||
print("\nWarning: You are trying to generate with max_length near or exceeding max_context_length. Most of the context will be removed, and your outputs will not be very coherent.")
|
||||
|
||||
inputs.max_context_length = max_context_length # this will resize the context buffer if changed
|
||||
inputs.max_length = max_length
|
||||
inputs.temperature = temperature
|
||||
|
@ -1471,7 +1473,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
current_token = 0
|
||||
incomplete_token_buffer = bytearray()
|
||||
async_sleep_short = 0.02
|
||||
await asyncio.sleep(0.25) #anti race condition, prevent check from overtaking generate
|
||||
await asyncio.sleep(0.3) #anti race condition, prevent check from overtaking generate
|
||||
try:
|
||||
tokenReserve = "" #keeps fully formed tokens that we cannot send out yet
|
||||
while True:
|
||||
|
@ -2042,6 +2044,7 @@ Enter Prompt:<br>
|
|||
return
|
||||
|
||||
finally:
|
||||
time.sleep(0.05)
|
||||
modelbusy.release()
|
||||
|
||||
self.send_response(404)
|
||||
|
@ -4209,10 +4212,20 @@ def main(launch_args,start_server=True):
|
|||
save_to_file = (args.benchmark and args.benchmark!="stdout" and args.benchmark!="")
|
||||
benchmaxctx = maxctx
|
||||
benchlen = 100
|
||||
benchtemp = 0.1
|
||||
benchtopk = 1
|
||||
benchreppen = 1
|
||||
benchbaneos = True
|
||||
benchmodel = sanitize_string(os.path.splitext(os.path.basename(modelname))[0])
|
||||
benchprompt = ""
|
||||
if args.prompt:
|
||||
benchprompt = args.prompt
|
||||
benchtopk = 100
|
||||
benchreppen = 1.07
|
||||
benchtemp = 0.8
|
||||
if not args.benchmark:
|
||||
benchbaneos = False
|
||||
benchlen = 256
|
||||
if args.benchmark:
|
||||
if os.path.exists(args.benchmark) and os.path.getsize(args.benchmark) > 1000000:
|
||||
print(f"\nWarning: The benchmark CSV output file you selected exceeds 1MB. This is probably not what you want, did you select the wrong CSV file?\nFor safety, benchmark output will not be saved.")
|
||||
|
@ -4225,7 +4238,7 @@ def main(launch_args,start_server=True):
|
|||
benchprompt = "1111111111111111"
|
||||
for i in range(0,14): #generate massive prompt
|
||||
benchprompt += benchprompt
|
||||
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,ban_eos_token=True)
|
||||
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=benchtemp,top_k=benchtopk,rep_pen=benchreppen,ban_eos_token=benchbaneos)
|
||||
result = genout['text']
|
||||
if args.prompt and not args.benchmark:
|
||||
restore_stdout()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue