added interrogate endpoint (+1 squashed commits)

Squashed commits:

[7bf96261] added interrogate endpoint
This commit is contained in:
Concedo 2024-03-11 18:28:23 +08:00
parent e4946b96ea
commit d59ec68753
3 changed files with 101 additions and 4 deletions

View file

@ -997,6 +997,60 @@
]
}
},
"/sdapi/v1/interrogate": {
"post": {
"description": "Generates a short text caption describing an image.",
"requestBody": {
"content": {
"application/json": {
"example": {
"image": "base64_image_data",
"model": "clip"
},
"schema": {
"properties": {
"image": {
"type": "string",
"description": "A base64 string containing the encoded PNG of the image."
},
"model": {
"type": "string",
"description": "Not used."
},
},
"type": "object"
}
}
},
"required": false
},
"responses": {
"200": {
"content": {
"application/json": {
"example":
{
"caption":"A picture of a white cottage with a flagpole."
},
"schema": {
"properties": {
"caption": {
"type": "string",
"description": "A short text description of the image."
}
}
}
}
},
"description": "Successful request"
}
},
"summary": "Generates a short text caption describing an image",
"tags": [
"sdapi/v1"
]
}
},
"/v1/completions": {
"post": {
"summary": "Generates text continuations given a prompt. Please refer to OpenAI documentation",

View file

@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
Current version: 122
Current version: 123
-Concedo
-->
@ -3497,6 +3497,7 @@ Current version: 122
var localmodehost = "localhost";
var kobold_endpoint_version = ""; //used to track problematic versions to avoid sending extra fields
var koboldcpp_version = ""; //detect if we are using koboldcpp
var koboldcpp_has_vision = false;
var last_request_str = "No Requests Available"; //full context of last submitted request
var lastcheckgenkey = ""; //for checking polled-streaming unique id when generating in kcpp
var globalabortcontroller = null;
@ -7013,6 +7014,7 @@ Current version: 122
{
koboldcpp_version = data.version;
console.log("KoboldCpp Detected: " + koboldcpp_version);
koboldcpp_has_vision = (data.vision?true:false);
//also check against kcpp's max true context length
fetch(apply_proxy_url(tmpep + koboldcpp_truemaxctxlen_endpoint))
@ -10717,7 +10719,17 @@ Current version: 122
let origprompt = (savedmeta.prompt?replaceAll(savedmeta.prompt,"\n"," ") : "No Saved Description");
latest_orig_prompt = origprompt;
let visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:((savedmeta.desc||savedmeta.visionmode==3)?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing</span>`));
let hasllava = (is_using_kcpp_with_llava() && koboldcpp_has_vision);
let visionstatus = "";
if(savedmeta.visionmode==3)
{
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(hasllava?`<span class="color_green">Active</span>`:`<span class="color_yellow">Unsupported</span>`));
}
else
{
visionstatus = ((!savedmeta.visionmode || savedmeta.visionmode==0)?`<span class="color_red">Inactive</span>`:(savedmeta.desc?`<span class="color_green">Active</span>`:`<span class="color_yellow">Analyzing</span>`));
}
let togglebtn = `<select class="form-control" id="aivisionmode" style="display:inline;height:24px;width: 134px; padding: 2px; margin: 3px; font-size:12px;" onchange="toggle_ai_vision(\'`+imghash+`\')">
<option value="0">Disabled</option>
<option value="1">Interrogate (Horde)</option>
@ -10725,7 +10737,7 @@ Current version: 122
<option value="3">Multimodal (LLaVA)</option>
</select>`;
document.getElementById("zoomedimgdesc").innerHTML = `
AI Vision: `+visionstatus+` <span class="helpicon">?<span class="helptext">This allows the AI to visually recognize this image, to see and react to this image. Uses Horde or Local A1111 for image interrogation if enabled.</span></span>
AI Vision: `+visionstatus+` <span class="helpicon">?<span class="helptext">This allows the AI to visually recognize this image, to see and react to this image. On KoboldCpp, LLaVA support can be used with multimodal models. Otherwise, uses Horde or Local A1111 for image interrogation if enabled.</span></span>
`+togglebtn+`
<br><button type="button" class="btn btn-primary" style="width: 140px; padding: 2px; margin: 3px; font-size:12px;" onclick="show_orig_prompt()">View Original Prompt</button>
`;

View file

@ -333,6 +333,17 @@ def set_backend_props(inputs):
inputs.vulkan_info = "0".encode("UTF-8")
return inputs
def end_trim_to_sentence(input_text):
enders = ['.', '!', '?', '*', '"', ')', '}', '`', ']', ';', '']
last = -1
for ender in enders:
last = max(last, input_text.rfind(ender))
nl = input_text.rfind("\n")
last = max(last, nl)
if last > 0:
return input_text[:last + 1].strip()
return input_text.strip()
def load_model(model_filename):
global args
inputs = load_model_inputs()
@ -720,6 +731,12 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
if len(images_added)>0:
genparams["images"] = images_added
elif api_format==5:
firstimg = genparams.get('image', "")
genparams["images"] = [firstimg]
genparams["max_length"] = 32
genparams["prompt"] = "### Instruction: In one sentence, write a descriptive caption for this image.\n### Response:"
return generate(
prompt=genparams.get('prompt', ""),
memory=genparams.get('memory', ""),
@ -776,6 +793,8 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
res = {"id": "chatcmpl-1", "object": "chat.completion", "created": 1, "model": friendlymodelname,
"usage": {"prompt_tokens": 100,"completion_tokens": 100,"total_tokens": 200},
"choices": [{"index": 0, "message":{"role": "assistant", "content": recvtxt,}, "finish_reason": "length"}]}
elif api_format==5:
res = {"caption": end_trim_to_sentence(recvtxt)}
else:
res = {"results": [{"text": recvtxt}]}
@ -1162,7 +1181,7 @@ Enter Prompt:<br>
try:
sse_stream_flag = False
api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat
api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate
is_txt2img = False
if self.path.endswith('/request'):
@ -1181,6 +1200,18 @@ Enter Prompt:<br>
if self.path.endswith('/v1/chat/completions'):
api_format = 4
if self.path.endswith('/sdapi/v1/interrogate'):
has_vision = (mmprojpath!="")
if not has_vision:
self.send_response(503)
self.end_headers(content_type='application/json')
self.wfile.write(json.dumps({"detail": {
"msg": "No LLaVA model loaded",
"type": "service_unavailable",
}}).encode())
return
api_format = 5
if self.path.endswith('/sdapi/v1/txt2img'):
is_txt2img = True