diff --git a/kcpp_docs.embd b/kcpp_docs.embd index a76177628..21e22f4ff 100644 --- a/kcpp_docs.embd +++ b/kcpp_docs.embd @@ -1170,7 +1170,7 @@ }, "/api/extra/tokencount": { "post": { - "description": "Counts the number of tokens in a string.", + "description": "Counts the number of tokens in a string, and returns their token IDs. Also aliased to /api/extra/tokenize", "requestBody": { "content": { "application/json": { @@ -1209,8 +1209,19 @@ ] }, "schema": { - "$ref": "#/components/schemas/ValueResult" - } + "properties": { + "value": { + "type": "number" + }, + "ids": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "type": "object" + } } }, "description": "Successful request" @@ -1222,6 +1233,61 @@ ] } }, + "/api/extra/detokenize": { + "post": { + "description": "Converts an array of token IDs into a string.", + "requestBody": { + "content": { + "application/json": { + "example": { + "ids": [529,29989,5205,29989,29958,13] + }, + "schema": { + "properties": { + "ids": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "type": "object" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/json": { + "example": { + "result": "<|system|>\n", + "success": true + }, + "schema": { + "properties": { + "result": { + "type": "string", + "description": "The detokenized string." + }, + "success": { + "type": "boolean" + } + }, + "type": "object" + } + } + }, + "description": "Successful request" + } + }, + "summary": "Converts an array of token IDs into a string.", + "tags": [ + "api/extra" + ] + } + }, "/api/extra/abort": { "post": { "description": "Aborts the currently ongoing text generation. Does not work when multiple requests are in queue.", diff --git a/koboldcpp.py b/koboldcpp.py index 11fed1002..21e512e8a 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1332,7 +1332,7 @@ def parse_last_logprobs(lastlogprobs): def transform_genparams(genparams, api_format): global chatcompl_adapter - #api format 1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama + #api format 1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama,7=ollamachat #alias all nonstandard alternative names for rep pen. rp1 = genparams.get('repeat_penalty', 1.0) rp2 = genparams.get('repetition_penalty', 1.0) @@ -1350,7 +1350,7 @@ def transform_genparams(genparams, api_format): elif api_format==2: pass - elif api_format==3 or api_format==4: + elif api_format==3 or api_format==4 or api_format==7: default_max_tok = (400 if api_format==4 else 200) genparams["max_length"] = genparams.get('max_tokens', genparams.get('max_completion_tokens', default_max_tok)) presence_penalty = genparams.get('presence_penalty', genparams.get('frequency_penalty', 0.0)) @@ -1364,7 +1364,7 @@ def transform_genparams(genparams, api_format): genparams["sampler_seed"] = tryparseint(genparams.get('seed', -1)) genparams["mirostat"] = genparams.get('mirostat_mode', 0) - if api_format==4: + if api_format==4 or api_format==7: #handle ollama chat here too # translate openai chat completion messages format into one big string. messages_array = genparams.get('messages', []) default_adapter = {} if chatcompl_adapter is None else chatcompl_adapter @@ -1482,6 +1482,7 @@ ws ::= | " " | "\n" [ \t]{0,20} user_message_start = adapter_obj.get("user_start", "### Instruction:") assistant_message_start = adapter_obj.get("assistant_start", "### Response:") genparams["prompt"] = f"{user_message_start} In one sentence, write a descriptive caption for this image.\n{assistant_message_start}" + elif api_format==6: detokstr = "" tokids = genparams.get('context', []) @@ -1606,6 +1607,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): oldprompt = genparams.get('ollamabodyprompt', "") tokarr = tokenize_ids(oldprompt+recvtxt,False) res = {"model": friendlymodelname,"created_at": str(datetime.now(timezone.utc).isoformat()),"response":recvtxt,"done": True,"context": tokarr,"total_duration": 1,"load_duration": 1,"prompt_eval_count": prompttokens,"prompt_eval_duration": 1,"eval_count": comptokens,"eval_duration": 1} + elif api_format == 7: + res = {"model": friendlymodelname,"created_at": str(datetime.now(timezone.utc).isoformat()),"message":{"role":"assistant","content":recvtxt},"done": True,"total_duration": 1,"load_duration": 1,"prompt_eval_count": prompttokens,"prompt_eval_duration": 1,"eval_count": comptokens,"eval_duration": 1} else: res = {"results": [{"text": recvtxt, "finish_reason": currfinishreason, "logprobs":logprobsdict, "prompt_tokens": prompttokens, "completion_tokens": comptokens}]} @@ -2250,7 +2253,7 @@ Enter Prompt:
try: sse_stream_flag = False - api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama + api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama,7=ollamachat is_imggen = False is_transcribe = False @@ -2284,6 +2287,8 @@ Enter Prompt:
if self.path.endswith('/api/generate'): api_format = 6 + if self.path.endswith('/api/chat'): + api_format = 7 if self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'): is_imggen = True