diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json new file mode 100644 index 000000000..a577753db --- /dev/null +++ b/kcpp_adapters/AutoGuess.json @@ -0,0 +1,113 @@ +[ +{ + "search": ["<|im_start|>assistant", "<|im_end|>", "<|im_sep|>"], + "name": "ChatML (Phi 4)", + "adapter": { + "system_start": "<|im_start|>system<|im_sep|>", + "system_end": "<|im_end|>", + "user_start": "<|im_start|>user<|im_sep|>", + "user_end": "<|im_end|>", + "assistant_start": "<|im_start|>assistant<|im_sep|>", + "assistant_end": "<|im_end|>" + } +}, { + "search": ["<|im_start|>assistant", "<|im_end|>", "You are provided with function signatures within "], + "name": "ChatML (Qwen 2.5 based).", + "adapter": { + "system_start": "<|im_start|>system\n\n", + "system_end": "<|im_end|>\n\n", + "user_start": "<|im_start|>user\n\n", + "user_end": "<|im_end|>\n\n", + "assistant_start": "<|im_start|>assistant\n\n", + "assistant_end": "<|im_end|>\n\n", + "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n\n", + "tools_end": "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" + } +}, { + "search": ["<|im_start|>assistant", "<|im_end|>"], + "name": "ChatML (Generic).", + "adapter": { + "system_start": "<|im_start|>system\n\n", + "system_end": "<|im_end|>\n\n", + "user_start": "<|im_start|>user\n\n", + "user_end": "<|im_end|>\n\n", + "assistant_start": "<|im_start|>assistant\n\n", + "assistant_end": "<|im_end|>\n\n" + } +}, { + "search": ["System role not supported", ""], + "name": "Google Gemma 2.", + "adapter": { + "user_start": "user\n", + "user_end": "\n", + "assistant_start": "model\n", + "assistant_end": "\n" + } +}, { + "search": ["<|start_header_id|>system"], + "name": "Llama 3.x.", + "adapter": { + "system_start": "<|start_header_id|>system<|end_header_id|>\n\n", + "system_end": "<|eot_id|>\n\n", + "user_start": "<|start_header_id|>user<|end_header_id|>\n\n", + "user_end": "<|eot_id|>\n\n", + "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n", + "assistant_end": "<|eot_id|>\n\n" + } +}, { + "search": ["[/INST]", "[SYSTEM_PROMPT]"], + "name": "Mistral V7 (with system prompt)", + "adapter": { + "system_start": "[SYSTEM_PROMPT] ", + "system_end": "[/SYSTEM_PROMPT]", + "user_start": "[INST] ", + "user_end": "[/INST]", + "assistant_start": " ", + "assistant_end": "" + } +}, { + "search": ["[/INST]", "\"[INST] \" + system_message"], + "name": "Mistral V3", + "adapter": { + "system_start": "[INST] ", + "system_end": "[/INST] ", + "user_start": "[INST] ", + "user_end": "[/INST] ", + "assistant_start": "", + "assistant_end": "" + } +}, { + "search": ["[/INST]"], + "name": "Mistral (Generic)", + "adapter": { + "system_start": "[INST]", + "system_end": "[/INST]\n", + "user_start": "[INST]", + "user_end": "[/INST]\n", + "assistant_start": "", + "assistant_end": "" + } +}, { + "search": ["<|system|>", "<|user|>"], + "name": "Phi 3.5", + "adapter": { + "system_start": "<|system|>\n", + "system_end": "<|end|>\n", + "user_start": "<|user|>\n", + "user_end": "<|end|>\n", + "assistant_start": "<|assistant|>\n", + "assistant_end": "<|end|>\n" + } +}, { + "search": ["<|START_OF_TURN_TOKEN|>"], + "name": "Cohere (Aya Expanse 32B based)", + "adapter": { + "system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>", + "system_end": "<|END_OF_TURN_TOKEN|>", + "user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>", + "user_end": "<|END_OF_TURN_TOKEN|>", + "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + "assistant_end": "<|END_OF_TURN_TOKEN|>" + } +} +] diff --git a/koboldcpp.py b/koboldcpp.py index 5793098c7..119178558 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4648,6 +4648,29 @@ def main(launch_args,start_server=True): exitcounter = 999 exit_with_error(3,"Could not load text model: " + modelname) + if ( + chatcompl_adapter is not None + and isinstance(chatcompl_adapter, list) + ): + # The chat completions adapter is a list that needs derivation from chat templates + # Try to derive chat completions adapter from chat template, now that we have the model loaded + ctbytes = handle.get_chat_template() + chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore") + candidates = chatcompl_adapter + chatcompl_adapter = None + if chat_template != "": + for entry in candidates: + if all(s in chat_template for s in entry['search']): + print(f"Chat completion heuristic: {entry['name']}") + chatcompl_adapter = entry['adapter'] + break + if chatcompl_adapter is None: + print("Chat template heuristics failed to identify chat completions format. Alpaca will be used.") + + if chatcompl_adapter is None and not args.chatcompletionsadapter: + print("Note: Alpaca format will be used for OpenAI Compatible API chat completions. Use --chatcompletionsadapter=AutoGuess to use chat template heuristics.") + + #handle loading image model if args.sdmodel and args.sdmodel!="": imgmodel = args.sdmodel