use a static buffer for kv reloads instead. also, added into lite ui

2025-09-13 10:29:43 +00:00 · 2025-06-03 22:32:46 +08:00 · 2025-06-03 22:32:46 +08:00 · 53f1511396
commit 53f1511396
parent 4b57108508
6 changed files with 239 additions and 99 deletions
--- a/kcpp_docs.embd
+++ b/kcpp_docs.embd
@ -440,7 +440,7 @@
             "info": {
                "title": "KoboldCpp API",
                "description": "For swagger.json, <a href=\"?json=1\">click here</a> or use <a href=\"https://lite.koboldai.net/koboldcpp_api.json\">online version</a>.",
-                "version": "2025.01.08"
+                "version": "2025.06.03"
             },
             "openapi": "3.0.3",
             "paths": {
@ -639,7 +639,7 @@
                               "application/json": {
                                  "example": {
                                     "result": "KoboldCpp",
-                                     "version": "2025.01.08",
+                                     "version": "2025.06.03",
                                     "protected": false,
                                     "txt2img": false,
                                     "vision": false,
@ -1909,8 +1909,10 @@
                                 "application/json": {
                                  "example": {
                                     "success": true,
-                                     "old_state": 0,
-                                     "new_state": 0
+                                     "old_state_size": 0,
+                                     "old_tokens": 0,
+                                     "new_state_size": 0,
+                                     "new_tokens": 0,
                                  },
                                  "schema": {
                                     "properties": {
@ -1918,13 +1920,21 @@
                                           "type": "boolean",
                                           "description": "Whether the operation was successful."
                                        },
-                                        "old_state": {
+                                        "old_state_size": {
                                           "type": "number",
                                           "description": "Bytes currently in used for existing save state."
                                        },
-                                        "new_state": {
+                                        "old_tokens": {
+                                           "type": "number",
+                                           "description": "How many tokens in currently existing save state."
+                                        },
+                                        "new_state_size": {
                                           "type": "number",
                                           "description": "Bytes a new save state is estimated to consume."
+                                        },
+                                        "new_tokens": {
+                                           "type": "number",
+                                           "description": "How many tokens will be stored if a new save state is made."
                                        }
                                     }
                                  }
@ -1947,13 +1957,23 @@
                            "content": {
                                 "application/json": {
                                  "example": {
-                                     "success": true
+                                     "success": true,
+                                     "new_state_size": 12345678,
+                                     "new_tokens": 100,
                                  },
                                  "schema": {
                                     "properties": {
                                        "success": {
                                           "type": "boolean",
                                           "description": "Whether the operation was successful."
+                                        },
+                                        "new_state_size": {
+                                           "type": "number",
+                                           "description": "Bytes a new save state is estimated to consume."
+                                        },
+                                        "new_tokens": {
+                                           "type": "number",
+                                           "description": "How many context tokens were saved in state."
                                        }
                                     }
                                  }
@ -1976,13 +1996,18 @@
                            "content": {
                                 "application/json": {
                                  "example": {
-                                     "success": true
+                                     "success": true,
+                                     "new_tokens": 100
                                  },
                                  "schema": {
                                     "properties": {
                                        "success": {
                                           "type": "boolean",
                                           "description": "Whether the operation was successful."
+                                        },
+                                        "new_tokens": {
+                                           "type": "number",
+                                           "description": "How many context tokens were loaded from state."
                                        }
                                     }
                                  }
@ -2423,7 +2448,7 @@
                "/v1/completions": {
                   "post": {
                      "summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation",
-                      "description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions)",
+                      "description": "Generates text continuations given a prompt.\n\nThis is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/completions](https://platform.openai.com/docs/api-reference/completions). All KoboldCpp samplers are supported, please refer to /api/v1/generate for more details.",
                      "requestBody": {
                         "content": {
                            "application/json": {
@ -2445,7 +2470,7 @@
                "/v1/chat/completions": {
                   "post": {
                      "summary": "Generates a response from a list of messages. Please refer to OpenAI documentation",
-                      "description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat)",
+                      "description": "Given a list of messages comprising a conversation, the model will return a response.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://platform.openai.com/docs/api-reference/chat](https://platform.openai.com/docs/api-reference/chat). All KoboldCpp samplers are supported, please refer to /api/v1/generate for more details.",
                      "requestBody": {
                         "content": {
                            "application/json": {