gemma3 template, updated lite, fixed tool calling, reenable ctx shift for gemma3

2025-09-10 09:04:36 +00:00 · 2025-03-14 17:47:01 +08:00 · 2025-03-14 17:47:01 +08:00 · 6a1dd57435
commit 6a1dd57435
parent 7dc72db9de
5 changed files with 78 additions and 8 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -2129,11 +2129,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
            printf("Qwen2VL detected! Mrope will be used, and context shift will be disabled!\n");
            kcpp_data->use_contextshift = false;
        }
        if(file_format_meta.model_architecture == GGUFArch::ARCH_GEMMA3)
        {
            printf("Gemma3 detected! Context shift will be disabled!\n");
            kcpp_data->use_contextshift = false;
        }
        model_params.main_gpu = cu_parseinfo_maindevice;
        #if defined(GGML_USE_CUDA)
--- a/kcpp_adapters/AutoGuess.json
+++ b/kcpp_adapters/AutoGuess.json
@ -43,6 +43,15 @@
        "assistant_start": "<start_of_turn>model\n",
        "assistant_end": "<end_of_turn>\n"
    }
 }, {
    "search": ["<start_of_image>", "<start_of_turn>", "<end_of_turn>"],
    "name": "Google Gemma 3.",
    "adapter": {
        "user_start": "<start_of_turn>user\n",
        "user_end": "<end_of_turn>\n",
        "assistant_start": "<start_of_turn>model\n",
        "assistant_end": "<end_of_turn>\n"
    }
 }, {
    "search": ["<|start_header_id|>system"],
    "name": "Llama 3.x.",
--- a/kcpp_docs.embd
+++ b/kcpp_docs.embd
@ -2070,6 +2070,18 @@
                   "post": {
                      "summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation",
                      "description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions)",
                      "requestBody": {
                         "content": {
                            "application/json": {
                               "example": {},
                               "schema": {
                                  "properties": {},
                                  "type": "object"
                               }
                            }
                         },
                         "required": true
                      },
                      "tags": [
                         "v1"
                      ],
@ -2080,6 +2092,18 @@
                   "post": {
                      "summary": "Generates a response from a list of messages. Please refer to OpenAI documentation",
                      "description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat)",
                      "requestBody": {
                         "content": {
                            "application/json": {
                               "example": {},
                               "schema": {
                                  "properties": {},
                                  "type": "object"
                               }
                            }
                         },
                         "required": true
                      },
                      "tags": [
                         "v1"
                      ],
@ -2110,6 +2134,18 @@
                   "post": {
                      "summary": "Generates Text-To-Speech audio from input text. Please refer to OpenAI documentation",
                      "description": "Generates Text-To-Speech audio from input text.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/audio/createSpeech](https://platform.openai.com/docs/api-reference/audio/createSpeech)",
                      "requestBody": {
                         "content": {
                            "application/json": {
                               "example": {},
                               "schema": {
                                  "properties": {},
                                  "type": "object"
                               }
                            }
                         },
                         "required": true
                      },
                      "tags": [
                         "v1"
                      ],
--- a/klite.embd
+++ b/klite.embd
@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
 -->
 <script>
-	const LITEVER = 222;
+	const LITEVER = 223;
 	const urlParams = new URLSearchParams(window.location.search);
 	var localflag = true;
 	const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -3284,7 +3284,7 @@ Current version indicated by LITEVER below.
 	},
 	{
 		"id":4,
-		"name":"Gemma 2",
+		"name":"Gemma 2 & 3",
 		"user":"<start_of_turn>user\\n",
 		"user_end":"<end_of_turn>\\n",
 		"assistant":"<start_of_turn>model\\n",
@ -3381,6 +3381,16 @@ Current version indicated by LITEVER below.
 		"assistant_end":"<｜end▁of▁sentence｜>",
 		"system":"",
 		"system_end":"",
 	},
 	{
 		"id":14,
 		"name":"KoboldCppAutoGuess",
 		"user":"{{[INPUT]}}",
 		"user_end":"",
 		"assistant":"{{[OUTPUT]}}",
 		"assistant_end":"",
 		"system":"{{[SYSTEM]}}",
 		"system_end":"",
 	}
 	];
@ -14369,6 +14379,12 @@ Current version indicated by LITEVER below.
 			submit_payload.params.banned_tokens = get_token_bans();
 			submit_payload.params.render_special = localsettings.render_special_tags;
 			submit_payload.params.logprobs = localsettings.request_logprobs;
 			let st = get_instruct_starttag(true);
 			let et = get_instruct_endtag(true);
 			if(st=="{{[INPUT]}}" && et=="{{[OUTPUT]}}")
 			{
 				submit_payload.params.autoguess_format = true;
 			}
 		}
 		if(custom_kobold_endpoint != "" && is_using_kcpp_with_dry() && localsettings.dry_multiplier > 0)
 		{
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -1157,6 +1157,20 @@ def generate(genparams, stream_flag=False):
    banned_tokens = genparams.get('banned_tokens', banned_strings)
    bypass_eos_token = genparams.get('bypass_eos', False)
    custom_token_bans = genparams.get('custom_token_bans', '')
    autoguess_format = genparams.get('autoguess_format', False)
    if autoguess_format:
        adapter_obj = {} if chatcompl_adapter is None else chatcompl_adapter
        system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
        user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")
        user_message_end = adapter_obj.get("user_end", "")
        assistant_message_start = adapter_obj.get("assistant_start", "\n### Response:\n")
        assistant_message_end = adapter_obj.get("assistant_end", "")
        prompt = prompt.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
        prompt = prompt.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
        prompt = prompt.replace("{{[SYSTEM]}}", system_message_start)
        memory = memory.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
        memory = memory.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
        memory = memory.replace("{{[SYSTEM]}}", system_message_start)
    for tok in custom_token_bans.split(','):
        tok = tok.strip()  # Remove leading/trailing whitespace
@ -1866,7 +1880,7 @@ def transform_genparams(genparams, api_format):
                if message['role'] == "user" and message_index == len(messages_array):
                    # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
                    tools_array = genparams.get('tools', [])
-                    if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) is not None:
+                    if tools_array and len(tools_array) > 0 :
                        response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
                        json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
                        tools_string = json.dumps(tools_array, indent=0)