mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
PoC: add chat template heuristics (#1283)
* PoC: add chat template heuristics The fallback chat template adapter of Vicuna is not ideal in some cases (e.g. a test against a sub-portion of the BBC news classification task on Kaggle gave an 82% accuracy with Vicuna and 88% with the official ChatML format for a q4_k_m Qwen 2.5 3B-Instruct gguf). This PR adds a proof of concept simple heuristic which looks at the chat template and upgrades the adapter when it is able to. * gemma 2 heuristic * Phi 4, Llama 3.x heuristics * better qwen vs generic heuristic * cleanup * mistral (generic) heuristic * fix sys msg for mistral * phi 3.5 * mistral v3 * cohere (aya expanse 32b based) * only derive from chat template if AutoGuess * add notes about alpaca fallbacks * added AutoGuess.json dummy * add mistral v7 * switch to using a json list with search strings
This commit is contained in:
parent
3e6ef8e0ef
commit
23ec550835
2 changed files with 136 additions and 0 deletions
23
koboldcpp.py
23
koboldcpp.py
|
@ -4648,6 +4648,29 @@ def main(launch_args,start_server=True):
|
|||
exitcounter = 999
|
||||
exit_with_error(3,"Could not load text model: " + modelname)
|
||||
|
||||
if (
|
||||
chatcompl_adapter is not None
|
||||
and isinstance(chatcompl_adapter, list)
|
||||
):
|
||||
# The chat completions adapter is a list that needs derivation from chat templates
|
||||
# Try to derive chat completions adapter from chat template, now that we have the model loaded
|
||||
ctbytes = handle.get_chat_template()
|
||||
chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore")
|
||||
candidates = chatcompl_adapter
|
||||
chatcompl_adapter = None
|
||||
if chat_template != "":
|
||||
for entry in candidates:
|
||||
if all(s in chat_template for s in entry['search']):
|
||||
print(f"Chat completion heuristic: {entry['name']}")
|
||||
chatcompl_adapter = entry['adapter']
|
||||
break
|
||||
if chatcompl_adapter is None:
|
||||
print("Chat template heuristics failed to identify chat completions format. Alpaca will be used.")
|
||||
|
||||
if chatcompl_adapter is None and not args.chatcompletionsadapter:
|
||||
print("Note: Alpaca format will be used for OpenAI Compatible API chat completions. Use --chatcompletionsadapter=AutoGuess to use chat template heuristics.")
|
||||
|
||||
|
||||
#handle loading image model
|
||||
if args.sdmodel and args.sdmodel!="":
|
||||
imgmodel = args.sdmodel
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue