Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	examples/server/README.md
This commit is contained in:
Concedo 2024-11-06 23:13:00 +08:00
commit 628dcd640e
10 changed files with 1111 additions and 741 deletions

View file

@ -9182,7 +9182,7 @@ static bool llm_load_tensors(
// print memory requirements per buffer type
for (auto & buf : model.bufs) {
LLAMA_LOG_INFO("%s: %10s model buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf.get()), ggml_backend_buffer_get_size(buf.get()) / 1024.0 / 1024.0);
LLAMA_LOG_INFO("%s: %12s model buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf.get()), ggml_backend_buffer_get_size(buf.get()) / 1024.0 / 1024.0);
}
// populate tensors_by_name
@ -21893,8 +21893,11 @@ static int32_t llama_chat_apply_template_internal(
// IBM Granite template
for (const auto & message : chat) {
std::string role(message->role);
ss << "<|start_of_role|>" << role << "<|end_of_role|>"
<< message->content << "<|end_of_text|>\n";
ss << "<|start_of_role|>" << role << "<|end_of_role|>";
if (role == "assistant_tool_call") {
ss << "<|tool_call|>";
}
ss << message->content << "<|end_of_text|>\n";
}
if (add_ass) {
ss << "<|start_of_role|>assistant<|end_of_role|>\n";