fixed compile errors, made mmap automatic when lora is selected, added updated quantizers and quantization handling for gpt neox gpt 2 and gptj

This commit is contained in:
Concedo 2023-04-24 23:20:06 +08:00
parent 3962eb39c7
commit 59fb174678
11 changed files with 297 additions and 590 deletions

View file

@ -99,6 +99,8 @@ def load_model(model_filename):
inputs.threads = args.threads
inputs.f16_kv = True
inputs.use_mmap = (not args.nommap)
if args.lora and args.lora!="":
inputs.use_mmap = False
inputs.use_smartcontext = args.smartcontext
inputs.unban_tokens = args.unbantokens
inputs.blasbatchsize = args.blasbatchsize
@ -141,7 +143,7 @@ maxctx = 2048
maxlen = 128
modelbusy = False
defaultport = 5001
KcppVersion = "1.13"
KcppVersion = "1.13.1"
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
sys_version = ""