mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 18:09:42 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # Makefile # README.md # examples/server/CMakeLists.txt # ggml/src/CMakeLists.txt
This commit is contained in:
commit
ab41e324d6
6 changed files with 28 additions and 5 deletions
|
@ -1487,7 +1487,7 @@ class StableLMModel(Model):
|
||||||
raise ValueError(f"Unprocessed norms: {norms}")
|
raise ValueError(f"Unprocessed norms: {norms}")
|
||||||
|
|
||||||
|
|
||||||
@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
|
@Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
|
||||||
class LlamaModel(Model):
|
class LlamaModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.LLAMA
|
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||||
|
|
||||||
|
|
12
examples/server/public/loading.html
Normal file
12
examples/server/public/loading.html
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="refresh" content="5">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="loading">
|
||||||
|
The model is loading. Please wait.<br/>
|
||||||
|
The user interface will appear soon.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -29,6 +29,7 @@
|
||||||
#include "system-prompts.js.hpp"
|
#include "system-prompts.js.hpp"
|
||||||
#include "prompt-formats.js.hpp"
|
#include "prompt-formats.js.hpp"
|
||||||
#include "json-schema-to-grammar.mjs.hpp"
|
#include "json-schema-to-grammar.mjs.hpp"
|
||||||
|
#include "loading.html.hpp"
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
@ -2593,10 +2594,16 @@ int main(int argc, char ** argv) {
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) {
|
auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
|
||||||
server_state current_state = state.load();
|
server_state current_state = state.load();
|
||||||
if (current_state == SERVER_STATE_LOADING_MODEL) {
|
if (current_state == SERVER_STATE_LOADING_MODEL) {
|
||||||
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
|
auto tmp = string_split(req.path, '.');
|
||||||
|
if (req.path == "/" || tmp.back() == "html") {
|
||||||
|
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
|
||||||
|
res.status = 503;
|
||||||
|
} else {
|
||||||
|
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -2987,6 +2994,8 @@ int main(int argc, char ** argv) {
|
||||||
}, [&](json error_data) {
|
}, [&](json error_data) {
|
||||||
server_sent_event(sink, "error", error_data);
|
server_sent_event(sink, "error", error_data);
|
||||||
});
|
});
|
||||||
|
static const std::string ev_done = "data: [DONE]\n\n";
|
||||||
|
sink.write(ev_done.data(), ev_done.size());
|
||||||
sink.done();
|
sink.done();
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1020,6 +1020,8 @@ async def oai_chat_completions(user_prompt,
|
||||||
event_data = line.split(': ', 1)
|
event_data = line.split(': ', 1)
|
||||||
assert event_data[0] == 'data', f'Bad event code received: ```{event_data}```'
|
assert event_data[0] == 'data', f'Bad event code received: ```{event_data}```'
|
||||||
chunk_raw = event_data[1]
|
chunk_raw = event_data[1]
|
||||||
|
if chunk_raw == '[DONE]':
|
||||||
|
break
|
||||||
|
|
||||||
chunk = json.loads(chunk_raw)
|
chunk = json.loads(chunk_raw)
|
||||||
assert len(chunk['choices']) == 1, f"no choices provided, line ```{line}```"
|
assert len(chunk['choices']) == 1, f"no choices provided, line ```{line}```"
|
||||||
|
|
|
@ -3419,7 +3419,7 @@ double ggml_type_sizef(enum ggml_type type) {
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL const char * ggml_type_name(enum ggml_type type) {
|
GGML_CALL const char * ggml_type_name(enum ggml_type type) {
|
||||||
return type_traits[type].type_name;
|
return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL bool ggml_is_quantized(enum ggml_type type) {
|
GGML_CALL bool ggml_is_quantized(enum ggml_type type) {
|
||||||
|
|
|
@ -15903,7 +15903,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_ubatch & batch) {
|
||||||
|
|
||||||
// clear unused states
|
// clear unused states
|
||||||
for (int i = 0; i < n_kv; ++i) {
|
for (int i = 0; i < n_kv; ++i) {
|
||||||
uint32_t cell_id = i + kv_self.head;
|
const uint32_t cell_id = i + kv_self.head;
|
||||||
llama_kv_cell & kv_cell = lctx.kv_self.cells[cell_id];
|
llama_kv_cell & kv_cell = lctx.kv_self.cells[cell_id];
|
||||||
|
|
||||||
data[i] = (float) (kv_cell.src >= 0);
|
data[i] = (float) (kv_cell.src >= 0);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue