fixed compile errors, made mmap automatic when lora is selected, added updated quantizers and quantization handling for gpt neox gpt 2 and gptj

2025-09-10 09:04:36 +00:00 · 2023-04-24 23:20:06 +08:00 · 2023-04-24 23:20:06 +08:00 · 59fb174678
commit 59fb174678
parent 3962eb39c7
11 changed files with 297 additions and 590 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -99,6 +99,8 @@ def load_model(model_filename):
    inputs.threads = args.threads
    inputs.f16_kv = True
    inputs.use_mmap = (not args.nommap)
+    if args.lora and args.lora!="":
+        inputs.use_mmap = False
    inputs.use_smartcontext = args.smartcontext
    inputs.unban_tokens = args.unbantokens
    inputs.blasbatchsize = args.blasbatchsize
@ -141,7 +143,7 @@ maxctx = 2048
 maxlen = 128
 modelbusy = False
 defaultport = 5001
-KcppVersion = "1.13"
+KcppVersion = "1.13.1"

 class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
    sys_version = ""