mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
fixed compile errors, made mmap automatic when lora is selected, added updated quantizers and quantization handling for gpt neox gpt 2 and gptj
This commit is contained in:
parent
3962eb39c7
commit
59fb174678
11 changed files with 297 additions and 590 deletions
|
@ -99,6 +99,8 @@ def load_model(model_filename):
|
|||
inputs.threads = args.threads
|
||||
inputs.f16_kv = True
|
||||
inputs.use_mmap = (not args.nommap)
|
||||
if args.lora and args.lora!="":
|
||||
inputs.use_mmap = False
|
||||
inputs.use_smartcontext = args.smartcontext
|
||||
inputs.unban_tokens = args.unbantokens
|
||||
inputs.blasbatchsize = args.blasbatchsize
|
||||
|
@ -141,7 +143,7 @@ maxctx = 2048
|
|||
maxlen = 128
|
||||
modelbusy = False
|
||||
defaultport = 5001
|
||||
KcppVersion = "1.13"
|
||||
KcppVersion = "1.13.1"
|
||||
|
||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
sys_version = ""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue