diff --git a/expose.cpp b/expose.cpp
index ba660e061..b9c57f7ea 100644
--- a/expose.cpp
+++ b/expose.cpp
@@ -275,14 +275,10 @@ extern "C"
return (int)last_stop_reason;
}
+ static std::string chat_template = "";
const char* get_chat_template() {
- // we need to keep this around
- static std::string* ct = nullptr;
- if (ct == nullptr) {
- ct = new std::string();
- }
- *ct = gpttype_get_chat_template();
- return ct->c_str();
+ chat_template = gpttype_get_chat_template();
+ return chat_template.c_str();
}
const char* get_pending_output() {
diff --git a/koboldcpp.py b/koboldcpp.py
index 6530d68c8..9751aef52 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -1914,6 +1914,11 @@ Enter Prompt:
elif self.path.endswith(('/.well-known/serviceinfo')):
response_body = (json.dumps({"version":"0.2","software":{"name":"KoboldCpp","version":KcppVersion,"repository":"https://github.com/LostRuins/koboldcpp","homepage":"https://github.com/LostRuins/koboldcpp","logo":"https://raw.githubusercontent.com/LostRuins/koboldcpp/refs/heads/concedo/niko.ico"},"api":{"koboldai":{"name":"KoboldAI API","rel_url":"/api","documentation":"https://lite.koboldai.net/koboldcpp_api","version":KcppVersion},"openai":{"name":"OpenAI API","rel_url ":"/v1","documentation":"https://openai.com/documentation/api","version":KcppVersion}}}).encode())
+ elif self.path=="/props":
+ ctbytes = handle.get_chat_template()
+ chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore")
+ response_body = (json.dumps({"chat_template":chat_template,"total_slots":1}).encode())
+
elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')):
content_type = 'text/html'
if embedded_kcpp_docs is None:
@@ -1957,13 +1962,6 @@ Enter Prompt:
self.send_header("location", self.path)
self.end_headers(content_type='text/html')
return None
- elif self.path.endswith('/props'):
- ctbytes = handle.get_chat_template()
- chat_template = ctypes.string_at(ctbytes).decode("UTF-8")
- # TODO: decide whether to add or skip below settings from llama.cpp /props endpoint.
- # { "default_generation_settings", ctx_server.default_generation_settings_for_props },
- # { "total_slots", ctx_server.params.n_parallel },
- response_body = (json.dumps({"chat_template":chat_template}).encode())
if response_body is None:
self.send_response(404)