gemma3 template, updated lite, fixed tool calling, reenable ctx shift for gemma3

This commit is contained in:
Concedo 2025-03-14 17:47:01 +08:00
parent 7dc72db9de
commit 6a1dd57435
5 changed files with 78 additions and 8 deletions

View file

@ -2129,11 +2129,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
printf("Qwen2VL detected! Mrope will be used, and context shift will be disabled!\n"); printf("Qwen2VL detected! Mrope will be used, and context shift will be disabled!\n");
kcpp_data->use_contextshift = false; kcpp_data->use_contextshift = false;
} }
if(file_format_meta.model_architecture == GGUFArch::ARCH_GEMMA3)
{
printf("Gemma3 detected! Context shift will be disabled!\n");
kcpp_data->use_contextshift = false;
}
model_params.main_gpu = cu_parseinfo_maindevice; model_params.main_gpu = cu_parseinfo_maindevice;
#if defined(GGML_USE_CUDA) #if defined(GGML_USE_CUDA)

View file

@ -43,6 +43,15 @@
"assistant_start": "<start_of_turn>model\n", "assistant_start": "<start_of_turn>model\n",
"assistant_end": "<end_of_turn>\n" "assistant_end": "<end_of_turn>\n"
} }
}, {
"search": ["<start_of_image>", "<start_of_turn>", "<end_of_turn>"],
"name": "Google Gemma 3.",
"adapter": {
"user_start": "<start_of_turn>user\n",
"user_end": "<end_of_turn>\n",
"assistant_start": "<start_of_turn>model\n",
"assistant_end": "<end_of_turn>\n"
}
}, { }, {
"search": ["<|start_header_id|>system"], "search": ["<|start_header_id|>system"],
"name": "Llama 3.x.", "name": "Llama 3.x.",

View file

@ -2070,6 +2070,18 @@
"post": { "post": {
"summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation", "summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation",
"description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions)", "description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions)",
"requestBody": {
"content": {
"application/json": {
"example": {},
"schema": {
"properties": {},
"type": "object"
}
}
},
"required": true
},
"tags": [ "tags": [
"v1" "v1"
], ],
@ -2080,6 +2092,18 @@
"post": { "post": {
"summary": "Generates a response from a list of messages. Please refer to OpenAI documentation", "summary": "Generates a response from a list of messages. Please refer to OpenAI documentation",
"description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat)", "description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat)",
"requestBody": {
"content": {
"application/json": {
"example": {},
"schema": {
"properties": {},
"type": "object"
}
}
},
"required": true
},
"tags": [ "tags": [
"v1" "v1"
], ],
@ -2110,6 +2134,18 @@
"post": { "post": {
"summary": "Generates Text-To-Speech audio from input text. Please refer to OpenAI documentation", "summary": "Generates Text-To-Speech audio from input text. Please refer to OpenAI documentation",
"description": "Generates Text-To-Speech audio from input text.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/audio/createSpeech](https://platform.openai.com/docs/api-reference/audio/createSpeech)", "description": "Generates Text-To-Speech audio from input text.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/audio/createSpeech](https://platform.openai.com/docs/api-reference/audio/createSpeech)",
"requestBody": {
"content": {
"application/json": {
"example": {},
"schema": {
"properties": {},
"type": "object"
}
}
},
"required": true
},
"tags": [ "tags": [
"v1" "v1"
], ],

View file

@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
--> -->
<script> <script>
const LITEVER = 222; const LITEVER = 223;
const urlParams = new URLSearchParams(window.location.search); const urlParams = new URLSearchParams(window.location.search);
var localflag = true; var localflag = true;
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_"; const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -3284,7 +3284,7 @@ Current version indicated by LITEVER below.
}, },
{ {
"id":4, "id":4,
"name":"Gemma 2", "name":"Gemma 2 & 3",
"user":"<start_of_turn>user\\n", "user":"<start_of_turn>user\\n",
"user_end":"<end_of_turn>\\n", "user_end":"<end_of_turn>\\n",
"assistant":"<start_of_turn>model\\n", "assistant":"<start_of_turn>model\\n",
@ -3381,6 +3381,16 @@ Current version indicated by LITEVER below.
"assistant_end":"<end▁of▁sentence>", "assistant_end":"<end▁of▁sentence>",
"system":"", "system":"",
"system_end":"", "system_end":"",
},
{
"id":14,
"name":"KoboldCppAutoGuess",
"user":"{{[INPUT]}}",
"user_end":"",
"assistant":"{{[OUTPUT]}}",
"assistant_end":"",
"system":"{{[SYSTEM]}}",
"system_end":"",
} }
]; ];
@ -14369,6 +14379,12 @@ Current version indicated by LITEVER below.
submit_payload.params.banned_tokens = get_token_bans(); submit_payload.params.banned_tokens = get_token_bans();
submit_payload.params.render_special = localsettings.render_special_tags; submit_payload.params.render_special = localsettings.render_special_tags;
submit_payload.params.logprobs = localsettings.request_logprobs; submit_payload.params.logprobs = localsettings.request_logprobs;
let st = get_instruct_starttag(true);
let et = get_instruct_endtag(true);
if(st=="{{[INPUT]}}" && et=="{{[OUTPUT]}}")
{
submit_payload.params.autoguess_format = true;
}
} }
if(custom_kobold_endpoint != "" && is_using_kcpp_with_dry() && localsettings.dry_multiplier > 0) if(custom_kobold_endpoint != "" && is_using_kcpp_with_dry() && localsettings.dry_multiplier > 0)
{ {

View file

@ -1157,6 +1157,20 @@ def generate(genparams, stream_flag=False):
banned_tokens = genparams.get('banned_tokens', banned_strings) banned_tokens = genparams.get('banned_tokens', banned_strings)
bypass_eos_token = genparams.get('bypass_eos', False) bypass_eos_token = genparams.get('bypass_eos', False)
custom_token_bans = genparams.get('custom_token_bans', '') custom_token_bans = genparams.get('custom_token_bans', '')
autoguess_format = genparams.get('autoguess_format', False)
if autoguess_format:
adapter_obj = {} if chatcompl_adapter is None else chatcompl_adapter
system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")
user_message_end = adapter_obj.get("user_end", "")
assistant_message_start = adapter_obj.get("assistant_start", "\n### Response:\n")
assistant_message_end = adapter_obj.get("assistant_end", "")
prompt = prompt.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
prompt = prompt.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
prompt = prompt.replace("{{[SYSTEM]}}", system_message_start)
memory = memory.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
memory = memory.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
memory = memory.replace("{{[SYSTEM]}}", system_message_start)
for tok in custom_token_bans.split(','): for tok in custom_token_bans.split(','):
tok = tok.strip() # Remove leading/trailing whitespace tok = tok.strip() # Remove leading/trailing whitespace
@ -1866,7 +1880,7 @@ def transform_genparams(genparams, api_format):
if message['role'] == "user" and message_index == len(messages_array): if message['role'] == "user" and message_index == len(messages_array):
# Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
tools_array = genparams.get('tools', []) tools_array = genparams.get('tools', [])
if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) is not None: if tools_array and len(tools_array) > 0 :
response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}] response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0) json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
tools_string = json.dumps(tools_array, indent=0) tools_string = json.dumps(tools_array, indent=0)