Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	.github/workflows/release.yml
#	CMakeLists.txt
#	examples/simple-chat/simple-chat.cpp
#	src/llama-quant.cpp
#	tools/run/run.cpp
#	tools/server/README.md
This commit is contained in:
Concedo 2025-06-24 23:06:16 +08:00
commit ace537d44e
17 changed files with 554 additions and 212 deletions

View file

@ -293,6 +293,7 @@ int main(int argc, char ** argv) {
if (!params.system_prompt.empty() || !params.prompt.empty()) {
common_chat_templates_inputs inputs;
inputs.use_jinja = g_params->use_jinja;
inputs.messages = chat_msgs;
inputs.add_generation_prompt = !params.prompt.empty();
@ -917,10 +918,19 @@ int main(int argc, char ** argv) {
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());
if (params.verbose_prompt) {
LOG_INF("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size() - original_size);
}
for (size_t i = original_size; i < embd_inp.size(); ++i) {
const llama_token token = embd_inp[i];
const std::string token_str = common_token_to_piece(ctx, token);
output_tokens.push_back(token);
output_ss << common_token_to_piece(ctx, token);
output_ss << token_str;
if (params.verbose_prompt) {
LOG_INF("%6d -> '%s'\n", token, token_str.c_str());
}
}
// reset assistant message