mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
gemma3 template, updated lite, fixed tool calling, reenable ctx shift for gemma3
This commit is contained in:
parent
7dc72db9de
commit
6a1dd57435
5 changed files with 78 additions and 8 deletions
|
@ -2129,11 +2129,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
printf("Qwen2VL detected! Mrope will be used, and context shift will be disabled!\n");
|
printf("Qwen2VL detected! Mrope will be used, and context shift will be disabled!\n");
|
||||||
kcpp_data->use_contextshift = false;
|
kcpp_data->use_contextshift = false;
|
||||||
}
|
}
|
||||||
if(file_format_meta.model_architecture == GGUFArch::ARCH_GEMMA3)
|
|
||||||
{
|
|
||||||
printf("Gemma3 detected! Context shift will be disabled!\n");
|
|
||||||
kcpp_data->use_contextshift = false;
|
|
||||||
}
|
|
||||||
model_params.main_gpu = cu_parseinfo_maindevice;
|
model_params.main_gpu = cu_parseinfo_maindevice;
|
||||||
|
|
||||||
#if defined(GGML_USE_CUDA)
|
#if defined(GGML_USE_CUDA)
|
||||||
|
|
|
@ -43,6 +43,15 @@
|
||||||
"assistant_start": "<start_of_turn>model\n",
|
"assistant_start": "<start_of_turn>model\n",
|
||||||
"assistant_end": "<end_of_turn>\n"
|
"assistant_end": "<end_of_turn>\n"
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
"search": ["<start_of_image>", "<start_of_turn>", "<end_of_turn>"],
|
||||||
|
"name": "Google Gemma 3.",
|
||||||
|
"adapter": {
|
||||||
|
"user_start": "<start_of_turn>user\n",
|
||||||
|
"user_end": "<end_of_turn>\n",
|
||||||
|
"assistant_start": "<start_of_turn>model\n",
|
||||||
|
"assistant_end": "<end_of_turn>\n"
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
"search": ["<|start_header_id|>system"],
|
"search": ["<|start_header_id|>system"],
|
||||||
"name": "Llama 3.x.",
|
"name": "Llama 3.x.",
|
||||||
|
|
|
@ -2070,6 +2070,18 @@
|
||||||
"post": {
|
"post": {
|
||||||
"summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation",
|
"summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation",
|
||||||
"description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions)",
|
"description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions)",
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {},
|
||||||
|
"schema": {
|
||||||
|
"properties": {},
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"v1"
|
"v1"
|
||||||
],
|
],
|
||||||
|
@ -2080,6 +2092,18 @@
|
||||||
"post": {
|
"post": {
|
||||||
"summary": "Generates a response from a list of messages. Please refer to OpenAI documentation",
|
"summary": "Generates a response from a list of messages. Please refer to OpenAI documentation",
|
||||||
"description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat)",
|
"description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat)",
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {},
|
||||||
|
"schema": {
|
||||||
|
"properties": {},
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"v1"
|
"v1"
|
||||||
],
|
],
|
||||||
|
@ -2110,6 +2134,18 @@
|
||||||
"post": {
|
"post": {
|
||||||
"summary": "Generates Text-To-Speech audio from input text. Please refer to OpenAI documentation",
|
"summary": "Generates Text-To-Speech audio from input text. Please refer to OpenAI documentation",
|
||||||
"description": "Generates Text-To-Speech audio from input text.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/audio/createSpeech](https://platform.openai.com/docs/api-reference/audio/createSpeech)",
|
"description": "Generates Text-To-Speech audio from input text.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/audio/createSpeech](https://platform.openai.com/docs/api-reference/audio/createSpeech)",
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"example": {},
|
||||||
|
"schema": {
|
||||||
|
"properties": {},
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"v1"
|
"v1"
|
||||||
],
|
],
|
||||||
|
|
20
klite.embd
20
klite.embd
|
@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
const LITEVER = 222;
|
const LITEVER = 223;
|
||||||
const urlParams = new URLSearchParams(window.location.search);
|
const urlParams = new URLSearchParams(window.location.search);
|
||||||
var localflag = true;
|
var localflag = true;
|
||||||
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
|
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
|
||||||
|
@ -3284,7 +3284,7 @@ Current version indicated by LITEVER below.
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id":4,
|
"id":4,
|
||||||
"name":"Gemma 2",
|
"name":"Gemma 2 & 3",
|
||||||
"user":"<start_of_turn>user\\n",
|
"user":"<start_of_turn>user\\n",
|
||||||
"user_end":"<end_of_turn>\\n",
|
"user_end":"<end_of_turn>\\n",
|
||||||
"assistant":"<start_of_turn>model\\n",
|
"assistant":"<start_of_turn>model\\n",
|
||||||
|
@ -3381,6 +3381,16 @@ Current version indicated by LITEVER below.
|
||||||
"assistant_end":"<|end▁of▁sentence|>",
|
"assistant_end":"<|end▁of▁sentence|>",
|
||||||
"system":"",
|
"system":"",
|
||||||
"system_end":"",
|
"system_end":"",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id":14,
|
||||||
|
"name":"KoboldCppAutoGuess",
|
||||||
|
"user":"{{[INPUT]}}",
|
||||||
|
"user_end":"",
|
||||||
|
"assistant":"{{[OUTPUT]}}",
|
||||||
|
"assistant_end":"",
|
||||||
|
"system":"{{[SYSTEM]}}",
|
||||||
|
"system_end":"",
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -14369,6 +14379,12 @@ Current version indicated by LITEVER below.
|
||||||
submit_payload.params.banned_tokens = get_token_bans();
|
submit_payload.params.banned_tokens = get_token_bans();
|
||||||
submit_payload.params.render_special = localsettings.render_special_tags;
|
submit_payload.params.render_special = localsettings.render_special_tags;
|
||||||
submit_payload.params.logprobs = localsettings.request_logprobs;
|
submit_payload.params.logprobs = localsettings.request_logprobs;
|
||||||
|
let st = get_instruct_starttag(true);
|
||||||
|
let et = get_instruct_endtag(true);
|
||||||
|
if(st=="{{[INPUT]}}" && et=="{{[OUTPUT]}}")
|
||||||
|
{
|
||||||
|
submit_payload.params.autoguess_format = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if(custom_kobold_endpoint != "" && is_using_kcpp_with_dry() && localsettings.dry_multiplier > 0)
|
if(custom_kobold_endpoint != "" && is_using_kcpp_with_dry() && localsettings.dry_multiplier > 0)
|
||||||
{
|
{
|
||||||
|
|
16
koboldcpp.py
16
koboldcpp.py
|
@ -1157,6 +1157,20 @@ def generate(genparams, stream_flag=False):
|
||||||
banned_tokens = genparams.get('banned_tokens', banned_strings)
|
banned_tokens = genparams.get('banned_tokens', banned_strings)
|
||||||
bypass_eos_token = genparams.get('bypass_eos', False)
|
bypass_eos_token = genparams.get('bypass_eos', False)
|
||||||
custom_token_bans = genparams.get('custom_token_bans', '')
|
custom_token_bans = genparams.get('custom_token_bans', '')
|
||||||
|
autoguess_format = genparams.get('autoguess_format', False)
|
||||||
|
if autoguess_format:
|
||||||
|
adapter_obj = {} if chatcompl_adapter is None else chatcompl_adapter
|
||||||
|
system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
|
||||||
|
user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")
|
||||||
|
user_message_end = adapter_obj.get("user_end", "")
|
||||||
|
assistant_message_start = adapter_obj.get("assistant_start", "\n### Response:\n")
|
||||||
|
assistant_message_end = adapter_obj.get("assistant_end", "")
|
||||||
|
prompt = prompt.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
|
||||||
|
prompt = prompt.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
|
||||||
|
prompt = prompt.replace("{{[SYSTEM]}}", system_message_start)
|
||||||
|
memory = memory.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
|
||||||
|
memory = memory.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
|
||||||
|
memory = memory.replace("{{[SYSTEM]}}", system_message_start)
|
||||||
|
|
||||||
for tok in custom_token_bans.split(','):
|
for tok in custom_token_bans.split(','):
|
||||||
tok = tok.strip() # Remove leading/trailing whitespace
|
tok = tok.strip() # Remove leading/trailing whitespace
|
||||||
|
@ -1866,7 +1880,7 @@ def transform_genparams(genparams, api_format):
|
||||||
if message['role'] == "user" and message_index == len(messages_array):
|
if message['role'] == "user" and message_index == len(messages_array):
|
||||||
# Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
|
# Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
|
||||||
tools_array = genparams.get('tools', [])
|
tools_array = genparams.get('tools', [])
|
||||||
if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) is not None:
|
if tools_array and len(tools_array) > 0 :
|
||||||
response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
|
response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
|
||||||
json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
|
json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
|
||||||
tools_string = json.dumps(tools_array, indent=0)
|
tools_string = json.dumps(tools_array, indent=0)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue