fix for chat templates and drafting

This commit is contained in:
Concedo 2025-01-23 11:49:40 +08:00
parent 03def285db
commit cca4a934dd
3 changed files with 27 additions and 11 deletions

View file

@ -601,10 +601,18 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll
} }
else else
{ {
printf("Error: Draft model vocab of (%d) does not match base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab); int diff = abs(draftvocab-base_n_vocab);
printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n"); if(diff <= 256)
llama_free(draft_ctx); {
draft_ctx = nullptr; //allow small differences to work
printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nSpeculative decoding may malfunction!\n",draftvocab,base_n_vocab);
} else {
printf("Error: Draft model vocab of (%d) is too different from base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n");
llama_free(draft_ctx);
draft_ctx = nullptr;
}
} }
} }
} }

View file

@ -0,0 +1,8 @@
{
"system_start": "",
"system_end": "",
"user_start": "<User>",
"user_end": "",
"assistant_start": "<Assistant>",
"assistant_end": "<end▁of▁sentence>"
}

View file

@ -59,7 +59,7 @@ maxhordelen = 400
modelbusy = threading.Lock() modelbusy = threading.Lock()
requestsinqueue = 0 requestsinqueue = 0
defaultport = 5001 defaultport = 5001
KcppVersion = "1.82.3" KcppVersion = "1.82.4"
showdebug = True showdebug = True
guimode = False guimode = False
showsamplerwarning = True showsamplerwarning = True
@ -3421,7 +3421,7 @@ def show_gui():
def on_picked_model_file(filepath): def on_picked_model_file(filepath):
if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt'): if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt'):
#load it as a config file instead #load it as a config file instead
with open(filepath, 'r') as f: with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
dict = json.load(f) dict = json.load(f)
import_vars(dict) import_vars(dict)
@ -4014,7 +4014,7 @@ def show_gui():
try: try:
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter): if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
print("Embedding chat completions adapter...") # parse and save embedded preload story print("Embedding chat completions adapter...") # parse and save embedded preload story
with open(args.chatcompletionsadapter, 'r') as f: with open(args.chatcompletionsadapter, 'r', encoding='utf-8', errors='ignore') as f:
args.chatcompletionsadapter = json.load(f) args.chatcompletionsadapter = json.load(f)
except Exception: except Exception:
pass pass
@ -4025,7 +4025,7 @@ def show_gui():
try: try:
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory): if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
print("Embedding preload story...") # parse and save embedded preload story print("Embedding preload story...") # parse and save embedded preload story
with open(args.preloadstory, 'r') as f: with open(args.preloadstory, 'r', encoding='utf-8', errors='ignore') as f:
args.preloadstory = json.load(f) args.preloadstory = json.load(f)
except Exception: except Exception:
pass pass
@ -4283,7 +4283,7 @@ def show_gui():
if not filename or filename=="": if not filename or filename=="":
return return
runmode_untouched = False runmode_untouched = False
with open(filename, 'r') as f: with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
dict = json.load(f) dict = json.load(f)
import_vars(dict) import_vars(dict)
pass pass
@ -4761,7 +4761,7 @@ def unload_libs():
def load_config_cli(filename): def load_config_cli(filename):
print("Loading .kcpps configuration file...") print("Loading .kcpps configuration file...")
with open(filename, 'r') as f: with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
config = json.load(f) config = json.load(f)
args.istemplate = False args.istemplate = False
raw_args = (sys.argv[1:]) #a lousy hack to allow for overriding kcpps raw_args = (sys.argv[1:]) #a lousy hack to allow for overriding kcpps
@ -4990,7 +4990,7 @@ def main(launch_args,start_server=True):
ccadapter_path = os.path.abspath(premade_adapt_path) ccadapter_path = os.path.abspath(premade_adapt_path)
if ccadapter_path: if ccadapter_path:
print(f"Loading Chat Completions Adapter: {ccadapter_path}") print(f"Loading Chat Completions Adapter: {ccadapter_path}")
with open(ccadapter_path, 'r') as f: with open(ccadapter_path, 'r', encoding='utf-8', errors='replace') as f:
chatcompl_adapter = json.load(f) chatcompl_adapter = json.load(f)
canload = True canload = True
else: else: