mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/llama-cli-intel.Dockerfile # .devops/llama-server-intel.Dockerfile # README.md # ggml/src/CMakeLists.txt # tests/test-chat-template.cpp
This commit is contained in:
commit
cca2fa9a6c
4 changed files with 11 additions and 2 deletions
|
@ -2724,7 +2724,7 @@ std::string llama_chat_format_single(const struct llama_model * model,
|
||||||
const llama_chat_msg & new_msg,
|
const llama_chat_msg & new_msg,
|
||||||
bool add_ass) {
|
bool add_ass) {
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
auto fmt_past_msg = llama_chat_apply_template(model, tmpl, past_msg, false);
|
auto fmt_past_msg = past_msg.empty() ? "" : llama_chat_apply_template(model, tmpl, past_msg, false);
|
||||||
std::vector<llama_chat_msg> chat_new(past_msg);
|
std::vector<llama_chat_msg> chat_new(past_msg);
|
||||||
// if the past_msg ends with a newline, we must preserve it in the formatted version
|
// if the past_msg ends with a newline, we must preserve it in the formatted version
|
||||||
if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {
|
if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') {
|
||||||
|
|
|
@ -125,6 +125,7 @@ static std::string chat_add_and_format(struct llama_model * model, std::vector<l
|
||||||
auto formatted = llama_chat_format_single(
|
auto formatted = llama_chat_format_single(
|
||||||
model, g_params->chat_template, chat_msgs, new_msg, role == "user");
|
model, g_params->chat_template, chat_msgs, new_msg, role == "user");
|
||||||
chat_msgs.push_back({role, content});
|
chat_msgs.push_back({role, content});
|
||||||
|
LOG("formatted: %s\n", formatted.c_str());
|
||||||
return formatted;
|
return formatted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -529,12 +529,16 @@ extern "C" {
|
||||||
struct llama_lora_adapter * adapter,
|
struct llama_lora_adapter * adapter,
|
||||||
float scale);
|
float scale);
|
||||||
|
|
||||||
// Remove a LoRA adapter from given context
|
// Remove a specific LoRA adapter from given context
|
||||||
// Return -1 if the adapter is not present in the context
|
// Return -1 if the adapter is not present in the context
|
||||||
LLAMA_API int32_t llama_lora_adapter_remove(
|
LLAMA_API int32_t llama_lora_adapter_remove(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
struct llama_lora_adapter * adapter);
|
struct llama_lora_adapter * adapter);
|
||||||
|
|
||||||
|
// Remove all LoRA adapters from given context
|
||||||
|
LLAMA_API void llama_lora_adapter_clear(
|
||||||
|
struct llama_context * ctx);
|
||||||
|
|
||||||
// Manually free a LoRA adapter
|
// Manually free a LoRA adapter
|
||||||
// Note: loaded adapters will be free when the associated model is deleted
|
// Note: loaded adapters will be free when the associated model is deleted
|
||||||
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
|
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
|
||||||
|
|
|
@ -16246,6 +16246,10 @@ int32_t llama_lora_adapter_remove(
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void llama_lora_adapter_clear(struct llama_context * ctx) {
|
||||||
|
ctx->lora_adapters.clear();
|
||||||
|
}
|
||||||
|
|
||||||
void llama_lora_adapter_free(struct llama_lora_adapter * adapter) {
|
void llama_lora_adapter_free(struct llama_lora_adapter * adapter) {
|
||||||
delete adapter;
|
delete adapter;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue