Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.devops/tools.sh
#	build-xcframework.sh
#	ci/run.sh
#	examples/Miku.sh
#	examples/chat-13B.sh
#	examples/chat-persistent.sh
#	examples/chat-vicuna.sh
#	examples/chat.sh
#	examples/jeopardy/jeopardy.sh
#	examples/reason-act.sh
#	examples/server-llama2-13B.sh
#	examples/sycl/build.sh
#	examples/sycl/run-llama2.sh
#	examples/sycl/run-llama3.sh
#	examples/ts-type-to-grammar.sh
#	ggml/src/ggml-cpu/CMakeLists.txt
#	ggml/src/ggml-sycl/element_wise.cpp
#	ggml/src/ggml-sycl/element_wise.hpp
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	scripts/apple/validate-apps.sh
#	scripts/apple/validate-ios.sh
#	scripts/apple/validate-macos.sh
#	scripts/apple/validate-tvos.sh
#	scripts/apple/validate-visionos.sh
#	scripts/check-requirements.sh
#	scripts/ci-run.sh
#	scripts/compare-commits.sh
#	scripts/debug-test.sh
#	scripts/gen-authors.sh
#	scripts/get-hellaswag.sh
#	scripts/get-pg.sh
#	scripts/get-wikitext-103.sh
#	scripts/get-wikitext-2.sh
#	scripts/get-winogrande.sh
#	scripts/hf.sh
#	scripts/qnt-all.sh
#	scripts/run-all-perf.sh
#	scripts/run-all-ppl.sh
#	scripts/sync-ggml-am.sh
#	scripts/sync-ggml.sh
#	scripts/tool_bench.sh
#	tests/test-backend-ops.cpp
#	tests/test-lora-conversion-inference.sh
#	tests/test-tokenizer-0.sh
#	tools/server/README.md
This commit is contained in:
Concedo 2025-06-30 20:38:44 +08:00
commit cdda9d16e0
42 changed files with 1519 additions and 118 deletions

View file

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
API_URL="${API_URL:-http://127.0.0.1:8080}"

View file

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
API_URL="${API_URL:-http://127.0.0.1:8080}"

Binary file not shown.

View file

@ -2110,6 +2110,7 @@ struct server_context {
/* use_jinja */ params_base.use_jinja,
/* prefill_assistant */ params_base.prefill_assistant,
/* reasoning_format */ params_base.reasoning_format,
/* chat_template_kwargs */ params_base.default_template_kwargs,
/* common_chat_templates */ chat_templates.get(),
/* allow_image */ mctx ? mtmd_support_vision(mctx) : false,
/* allow_audio */ mctx ? mtmd_support_audio (mctx) : false,

View file

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# make sure we are in the right directory
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

View file

@ -579,6 +579,7 @@ struct oaicompat_parser_options {
bool use_jinja;
bool prefill_assistant;
common_reasoning_format reasoning_format;
std::map<std::string,std::string> chat_template_kwargs;
common_chat_templates * tmpls;
bool allow_image;
bool allow_audio;
@ -756,6 +757,13 @@ static json oaicompat_chat_params_parse(
llama_params["parse_tool_calls"] = true;
}
// merge the template args provided from command line with the args provided in the user request
auto chat_template_kwargs_object = json_value(body, "chat_template_kwargs", json::object());
inputs.chat_template_kwargs = opt.chat_template_kwargs;
for (const auto & item : chat_template_kwargs_object.items()) {
inputs.chat_template_kwargs[item.key()] = item.value().dump();
}
// if the assistant message appears at the end of list, we do not add end-of-turn token
// for ex. this can be useful to modify the reasoning process in reasoning models
bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" && opt.prefill_assistant;
@ -771,6 +779,11 @@ static json oaicompat_chat_params_parse(
/* TODO: test this properly */
inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE;
if ( (!inputs.enable_thinking) || inputs.chat_template_kwargs.find("enable_thinking") != inputs.chat_template_kwargs.end()) {
throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking.");
}
inputs.add_generation_prompt = true;
}

View file

@ -231,7 +231,7 @@ function ConversationItem({
>
{conv.name}
</button>
<div className="dropdown dropdown-end h-5">
<div tabIndex={0} className="dropdown dropdown-end h-5">
<BtnWithTooltips
// on mobile, we always show the ellipsis icon
// on desktop, we only show it when the user hovers over the conversation item