mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
fix for chat templates and drafting
This commit is contained in:
parent
03def285db
commit
cca4a934dd
3 changed files with 27 additions and 11 deletions
|
@ -601,10 +601,18 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
printf("Error: Draft model vocab of (%d) does not match base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
|
int diff = abs(draftvocab-base_n_vocab);
|
||||||
printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n");
|
if(diff <= 256)
|
||||||
llama_free(draft_ctx);
|
{
|
||||||
draft_ctx = nullptr;
|
//allow small differences to work
|
||||||
|
printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nSpeculative decoding may malfunction!\n",draftvocab,base_n_vocab);
|
||||||
|
} else {
|
||||||
|
printf("Error: Draft model vocab of (%d) is too different from base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
|
||||||
|
printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n");
|
||||||
|
llama_free(draft_ctx);
|
||||||
|
draft_ctx = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
8
kcpp_adapters/DeepSeek-V2.json
Normal file
8
kcpp_adapters/DeepSeek-V2.json
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"system_start": "",
|
||||||
|
"system_end": "",
|
||||||
|
"user_start": "<|User|>",
|
||||||
|
"user_end": "",
|
||||||
|
"assistant_start": "<|Assistant|>",
|
||||||
|
"assistant_end": "<|end▁of▁sentence|>"
|
||||||
|
}
|
14
koboldcpp.py
14
koboldcpp.py
|
@ -59,7 +59,7 @@ maxhordelen = 400
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.82.3"
|
KcppVersion = "1.82.4"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
guimode = False
|
guimode = False
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
|
@ -3421,7 +3421,7 @@ def show_gui():
|
||||||
def on_picked_model_file(filepath):
|
def on_picked_model_file(filepath):
|
||||||
if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt'):
|
if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt'):
|
||||||
#load it as a config file instead
|
#load it as a config file instead
|
||||||
with open(filepath, 'r') as f:
|
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
dict = json.load(f)
|
dict = json.load(f)
|
||||||
import_vars(dict)
|
import_vars(dict)
|
||||||
|
|
||||||
|
@ -4014,7 +4014,7 @@ def show_gui():
|
||||||
try:
|
try:
|
||||||
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
|
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
|
||||||
print("Embedding chat completions adapter...") # parse and save embedded preload story
|
print("Embedding chat completions adapter...") # parse and save embedded preload story
|
||||||
with open(args.chatcompletionsadapter, 'r') as f:
|
with open(args.chatcompletionsadapter, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
args.chatcompletionsadapter = json.load(f)
|
args.chatcompletionsadapter = json.load(f)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
@ -4025,7 +4025,7 @@ def show_gui():
|
||||||
try:
|
try:
|
||||||
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
|
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
|
||||||
print("Embedding preload story...") # parse and save embedded preload story
|
print("Embedding preload story...") # parse and save embedded preload story
|
||||||
with open(args.preloadstory, 'r') as f:
|
with open(args.preloadstory, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
args.preloadstory = json.load(f)
|
args.preloadstory = json.load(f)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
@ -4283,7 +4283,7 @@ def show_gui():
|
||||||
if not filename or filename=="":
|
if not filename or filename=="":
|
||||||
return
|
return
|
||||||
runmode_untouched = False
|
runmode_untouched = False
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
dict = json.load(f)
|
dict = json.load(f)
|
||||||
import_vars(dict)
|
import_vars(dict)
|
||||||
pass
|
pass
|
||||||
|
@ -4761,7 +4761,7 @@ def unload_libs():
|
||||||
|
|
||||||
def load_config_cli(filename):
|
def load_config_cli(filename):
|
||||||
print("Loading .kcpps configuration file...")
|
print("Loading .kcpps configuration file...")
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
args.istemplate = False
|
args.istemplate = False
|
||||||
raw_args = (sys.argv[1:]) #a lousy hack to allow for overriding kcpps
|
raw_args = (sys.argv[1:]) #a lousy hack to allow for overriding kcpps
|
||||||
|
@ -4990,7 +4990,7 @@ def main(launch_args,start_server=True):
|
||||||
ccadapter_path = os.path.abspath(premade_adapt_path)
|
ccadapter_path = os.path.abspath(premade_adapt_path)
|
||||||
if ccadapter_path:
|
if ccadapter_path:
|
||||||
print(f"Loading Chat Completions Adapter: {ccadapter_path}")
|
print(f"Loading Chat Completions Adapter: {ccadapter_path}")
|
||||||
with open(ccadapter_path, 'r') as f:
|
with open(ccadapter_path, 'r', encoding='utf-8', errors='replace') as f:
|
||||||
chatcompl_adapter = json.load(f)
|
chatcompl_adapter = json.load(f)
|
||||||
canload = True
|
canload = True
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue