Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/release.yml
#	.github/workflows/server.yml
#	.github/workflows/ui-build.yml
#	.github/workflows/ui-publish.yml
#	CMakeLists.txt
#	docs/autoparser.md
#	docs/backend/snapdragon/CMakeUserPresets.json
#	docs/backend/snapdragon/README.md
#	docs/backend/snapdragon/windows.md
#	docs/function-calling.md
#	examples/model-conversion/scripts/embedding/run-original-model.py
#	ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c
#	ggml/src/ggml-opencl/ggml-opencl.cpp
#	ggml/src/ggml-opencl/kernels/cvt.cl
#	ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemm_moe_q4_k_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemm_moe_q5_k_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemm_moe_q6_k_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemv_moe_q4_k_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemv_moe_q5_k_f32_ns.cl
#	ggml/src/ggml-opencl/kernels/gemv_moe_q6_k_f32_ns.cl
#	ggml/src/ggml-sycl/common.hpp
#	ggml/src/ggml-sycl/dmmv.cpp
#	ggml/src/ggml-sycl/gated_delta_net.cpp
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	ggml/src/ggml-vulkan/CMakeLists.txt
#	ggml/src/ggml-zendnn/CMakeLists.txt
#	ggml/src/ggml-zendnn/ggml-zendnn.cpp
#	requirements/requirements-convert_hf_to_gguf.txt
#	scripts/snapdragon/windows/setup-build.ps1
#	tools/perplexity/perplexity.cpp
This commit is contained in:
Concedo 2026-05-24 13:55:44 +08:00
commit 8ca4283f55
17 changed files with 323 additions and 203 deletions

View file

@ -219,6 +219,7 @@ struct common_chat_parser_params {
bool reasoning_in_content = false;
std::string generation_prompt;
bool parse_tool_calls = true;
bool is_continuation = false;
bool echo = false; // Include assistant prefilled msg in output
bool debug = false; // Enable debug output for PEG parser
common_peg_arena parser = {};

View file

@ -618,11 +618,7 @@ struct common_params {
std::map<std::string, std::string> default_template_kwargs;
// UI configs
#ifdef LLAMA_UI_DEFAULT_ENABLED
bool ui = LLAMA_UI_DEFAULT_ENABLED != 0;
#else
bool ui = true; // default to enabled when not set
#endif
bool ui = true;
// Deprecated: use ui, ui_mcp_proxy, ui_config_json instead
bool webui = ui;

View file

@ -306,7 +306,7 @@ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_
GGML_ASSERT(tensor);
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
if (n_copies <= 1 || backend->iface.set_tensor_2d_async == NULL) {
if (n_copies <= 1 || backend->iface.get_tensor_2d_async == NULL) {
for (size_t i = 0; i < n_copies; i++) {
ggml_backend_tensor_get_async(backend, tensor, (char *) data + i*stride_data, offset + i*stride_tensor, size);
}
@ -317,7 +317,7 @@ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_
}
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
backend->iface.get_tensor_2d_async(backend, tensor, data, offset, size, n_copies, stride_tensor, stride_data);
}

View file

@ -1465,6 +1465,12 @@ bool llama_model_base::load_tensors(llama_model_loader & ml) {
if (!layer.ssm_beta_s && layer.ssm_beta) {
layer.ssm_beta_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.nextn.eh_proj_s && layer.nextn.eh_proj) {
layer.nextn.eh_proj_s = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.nextn.shared_head_head_s && layer.nextn.shared_head_head) {
layer.nextn.shared_head_head_s = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "scale", i), {1}, TENSOR_NOT_REQUIRED);
}
// input scales
if (!layer.wq_in_s && layer.wq) {
@ -1524,6 +1530,12 @@ bool llama_model_base::load_tensors(llama_model_loader & ml) {
if (!layer.ssm_beta_in_s && layer.ssm_beta) {
layer.ssm_beta_in_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.nextn.eh_proj_in_s && layer.nextn.eh_proj) {
layer.nextn.eh_proj_in_s = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
}
if (!layer.nextn.shared_head_head_in_s && layer.nextn.shared_head_head) {
layer.nextn.shared_head_head_in_s = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
}
}
// output scales
if (output && output->type == GGML_TYPE_NVFP4) {

View file

@ -202,12 +202,16 @@ struct llama_layer_shortconv {
};
struct llama_layer_nextn {
struct ggml_tensor * eh_proj = nullptr;
struct ggml_tensor * embed_tokens = nullptr;
struct ggml_tensor * enorm = nullptr;
struct ggml_tensor * hnorm = nullptr;
struct ggml_tensor * shared_head_head = nullptr;
struct ggml_tensor * shared_head_norm = nullptr;
struct ggml_tensor * eh_proj = nullptr;
struct ggml_tensor * eh_proj_s = nullptr;
struct ggml_tensor * eh_proj_in_s = nullptr;
struct ggml_tensor * embed_tokens = nullptr;
struct ggml_tensor * enorm = nullptr;
struct ggml_tensor * hnorm = nullptr;
struct ggml_tensor * shared_head_head = nullptr;
struct ggml_tensor * shared_head_head_s = nullptr;
struct ggml_tensor * shared_head_head_in_s = nullptr;
struct ggml_tensor * shared_head_norm = nullptr;
};
struct llama_layer {

View file

@ -538,7 +538,7 @@ llama_model_qwen35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
ggml_tensor * concat = ggml_concat(ctx0, e_norm, h_norm, /*dim=*/ 0);
cb(concat, "mtp_concat", il);
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat);
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat, layer.nextn.eh_proj_s);
cb(cur, "mtp_eh_proj", il);
ggml_tensor * inpSA = cur;
@ -626,8 +626,9 @@ llama_model_qwen35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
cb(cur, "mtp_shared_head_norm", -1);
ggml_tensor * head_w = layer.nextn.shared_head_head ? layer.nextn.shared_head_head : model.output;
ggml_tensor * head_s = layer.nextn.shared_head_head ? layer.nextn.shared_head_head_s : model.output_s;
GGML_ASSERT(head_w && "QWEN35 MTP: missing LM head (nextn.shared_head_head or model.output)");
cur = build_lora_mm(head_w, cur);
cur = build_lora_mm(head_w, cur, head_s);
cb(cur, "result_output", -1);
res->t_logits = cur;

View file

@ -602,7 +602,7 @@ llama_model_qwen35moe::graph_mtp::graph_mtp(const llama_model & model, const llm
ggml_tensor * concat = ggml_concat(ctx0, e_norm, h_norm, /*dim=*/ 0);
cb(concat, "mtp_concat", il);
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat);
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat, layer.nextn.eh_proj_s);
cb(cur, "mtp_eh_proj", il);
ggml_tensor * inpSA = cur;
@ -722,8 +722,9 @@ llama_model_qwen35moe::graph_mtp::graph_mtp(const llama_model & model, const llm
cb(cur, "mtp_shared_head_norm", -1);
ggml_tensor * head_w = layer.nextn.shared_head_head ? layer.nextn.shared_head_head : model.output;
ggml_tensor * head_s = layer.nextn.shared_head_head ? layer.nextn.shared_head_head_s : model.output_s;
GGML_ASSERT(head_w && "QWEN35MOE MTP: missing LM head (nextn.shared_head_head or model.output)");
cur = build_lora_mm(head_w, cur);
cur = build_lora_mm(head_w, cur, head_s);
cb(cur, "result_output", -1);
res->t_logits = cur;

View file

@ -1,5 +1,12 @@
-r ../../requirements/requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu
pillow~=11.3.0
torch~=2.6.0
torchvision~=0.21.0
## Embedding Gemma requires PyTorch 2.6.0 or later, bumped to 2.11.0 for compatibility
torch==2.11.0; platform_machine != "s390x" # check_requirements: ignore "=="
torchvision==0.26.0; platform_machine != "s390x" # check_requirements: ignore "=="
# torch s390x packages can only be found from nightly builds
--extra-index-url https://download.pytorch.org/whl/nightly
torch>=0.0.0.dev0; platform_machine == "s390x" # check_requirements: ignore "=="
torchvision>=0.0.0.dev0; platform_machine == "s390x" # check_requirements: ignore "=="

View file

@ -231,16 +231,19 @@ bool server_http_context::init(const common_params & params) {
};
auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
(void)req; // suppress unused parameter warning when LLAMA_BUILD_UI is not defined
bool ready = is_ready.load();
if (!ready) {
#if defined(LLAMA_BUILD_UI)
#if defined(LLAMA_UI_HAS_ASSETS)
auto tmp = string_split<std::string>(req.path, '.');
if (req.path == "/" || (tmp.size() > 0 && tmp.back() == "html")) {
res.status = 503;
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
return false;
if (const llama_ui_asset * a = llama_ui_find_asset("loading.html")) {
res.status = 503;
res.set_content(reinterpret_cast<const char*>(a->data), a->size, "text/html; charset=utf-8");
return false;
}
}
#else
(void)req;
#endif
// no endpoints are allowed to be accessed when the server is not ready
// this is to prevent any data races or inconsistent states
@ -312,23 +315,27 @@ bool server_http_context::init(const common_params & params) {
return 1;
}
} else {
#if defined(LLAMA_BUILD_UI)
// using embedded static index.html
srv->Get(params.api_prefix + "/", [](const httplib::Request & /*req*/, httplib::Response & res) {
// COEP and COOP headers, required by pyodide (python interpreter)
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
res.set_content(reinterpret_cast<const char*>(index_html), index_html_len, "text/html; charset=utf-8");
return false;
});
srv->Get(params.api_prefix + "/bundle.js", [](const httplib::Request & /*req*/, httplib::Response & res) {
res.set_content(reinterpret_cast<const char*>(bundle_js), bundle_js_len, "application/javascript; charset=utf-8");
return false;
});
srv->Get(params.api_prefix + "/bundle.css", [](const httplib::Request & /*req*/, httplib::Response & res) {
res.set_content(reinterpret_cast<const char*>(bundle_css), bundle_css_len, "text/css; charset=utf-8");
return false;
});
#if defined(LLAMA_UI_HAS_ASSETS)
auto serve_asset = [](const std::string & name, const char * mime, bool with_isolation_headers) {
return [name, mime, with_isolation_headers](const httplib::Request & /*req*/, httplib::Response & res) {
const llama_ui_asset * a = llama_ui_find_asset(name.c_str());
if (!a) {
res.status = 404;
return false;
}
if (with_isolation_headers) {
// COEP and COOP headers, required by pyodide (python interpreter)
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
}
res.set_content(reinterpret_cast<const char*>(a->data), a->size, mime);
return false;
};
};
srv->Get(params.api_prefix + "/", serve_asset("index.html", "text/html; charset=utf-8", true));
srv->Get(params.api_prefix + "/bundle.js", serve_asset("bundle.js", "application/javascript; charset=utf-8", false));
srv->Get(params.api_prefix + "/bundle.css", serve_asset("bundle.css", "text/css; charset=utf-8", false));
#endif
}
}

View file

@ -149,7 +149,7 @@ task_result_state::task_result_state(const common_chat_parser_params & chat_pars
, oai_resp_id("resp_" + random_string())
, oai_resp_reasoning_id("rs_" + random_string())
, oai_resp_message_id("msg_" + random_string()) {
if (!chat_parser_params.echo) {
if (chat_parser_params.is_continuation && !chat_parser_params.echo) {
// initialize chat_msg to avoid emitting a delta containing the assistant prefill
chat_msg = common_chat_parse("", true, chat_parser_params);
}
@ -432,6 +432,10 @@ task_params server_task::params_from_json_cmpl(
if (data.contains("chat_parser")) {
params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());
}
if (data.contains("continue_final_message")) {
auto continuation = common_chat_continuation_parse(data.at("continue_final_message"));
params.chat_parser_params.is_continuation = continuation != COMMON_CHAT_CONTINUATION_NONE;
}
params.chat_parser_params.echo = json_value(data, "echo", false);
}

View file

@ -1,150 +1,98 @@
set(TARGET llama-ui)
# Deprecated: use LLAMA_UI_HF_BUCKET instead
set(LLAMA_WEBUI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt webui assets (deprecated: use LLAMA_UI_HF_BUCKET)")
set(LLAMA_UI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt UI assets")
# Backward compat: forward old var to new one
if(DEFINED LLAMA_WEBUI_HF_BUCKET AND NOT DEFINED LLAMA_UI_HF_BUCKET)
if(DEFINED LLAMA_BUILD_WEBUI)
set(LLAMA_BUILD_UI ${LLAMA_BUILD_WEBUI})
message(DEPRECATION "LLAMA_BUILD_WEBUI is deprecated, use LLAMA_BUILD_UI instead")
endif()
if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
endif()
if(DEFINED LLAMA_WEBUI_HF_BUCKET)
set(LLAMA_UI_HF_BUCKET ${LLAMA_WEBUI_HF_BUCKET})
elseif(DEFINED LLAMA_WEBUI_HF_BUCKET AND NOT "${LLAMA_WEBUI_HF_BUCKET}" STREQUAL "${LLAMA_UI_HF_BUCKET}")
message(DEPRECATION "LLAMA_WEBUI_HF_BUCKET is deprecated, use LLAMA_UI_HF_BUCKET instead")
endif()
set(TARGET_SRCS "")
set(UI_COMPILE_DEFS "")
if(LLAMA_BUILD_UI)
set(PUBLIC_ASSETS
index.html
bundle.js
bundle.css
loading.html
)
# Determine source of UI assets (priority: local > HF Bucket)
set(UI_SOURCE "")
set(UI_SOURCE_DIR "")
# Priority 1: Check for local build output
set(LOCAL_UI_DIR "${PROJECT_SOURCE_DIR}/build/tools/ui/dist")
# Verify all required assets exist before declaring local source valid
set(ALL_ASSETS_PRESENT TRUE)
foreach(asset ${PUBLIC_ASSETS})
if(NOT EXISTS "${LOCAL_UI_DIR}/${asset}")
set(ALL_ASSETS_PRESENT FALSE)
break()
endif()
endforeach()
if(ALL_ASSETS_PRESENT)
set(UI_SOURCE "local")
set(UI_SOURCE_DIR "${LOCAL_UI_DIR}")
message(STATUS "UI: using local build from ${UI_SOURCE_DIR}")
endif()
# Priority 2: Build-time asset provisioning (npm build → HF Bucket fallback)
if(NOT UI_SOURCE_DIR)
# Environment variable takes precedence (e.g., from CI workflows)
# Deprecated: use HF_UI_VERSION instead
if(DEFINED ENV{HF_WEBUI_VERSION})
set(HF_UI_VERSION "$ENV{HF_WEBUI_VERSION}")
message(DEPRECATION "HF_WEBUI_VERSION env var is deprecated, use HF_UI_VERSION instead")
if(NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
message(FATAL_ERROR "UI: invalid HF_WEBUI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
endif()
elseif(DEFINED ENV{HF_UI_VERSION})
set(HF_UI_VERSION "$ENV{HF_UI_VERSION}")
if(NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
message(FATAL_ERROR "UI: invalid HF_UI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
endif()
elseif(DEFINED LLAMA_BUILD_NUMBER)
set(HF_UI_VERSION "b${LLAMA_BUILD_NUMBER}")
message(STATUS "UI: derived HF_UI_VERSION=b${LLAMA_BUILD_NUMBER}")
else()
set(HF_UI_VERSION "")
message(STATUS "UI: version not specified (will use HF 'latest')")
endif()
if("${HF_UI_VERSION}" STREQUAL "")
set(UI_VERSION_TAG "provisioned")
else()
set(UI_VERSION_TAG "${HF_UI_VERSION}")
endif()
set(UI_STAMP "${CMAKE_CURRENT_BINARY_DIR}/.ui-${UI_VERSION_TAG}.stamp")
string(REPLACE ";" "+" PUBLIC_ASSETS_JOINED "${PUBLIC_ASSETS}")
add_custom_command(
OUTPUT ${UI_STAMP}
COMMAND ${CMAKE_COMMAND}
"-DSOURCE_DIR=${PROJECT_SOURCE_DIR}"
"-DPUBLIC_DIR=${PROJECT_SOURCE_DIR}/build/tools/ui/dist"
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
"-DHF_VERSION=${HF_UI_VERSION}"
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
"-DASSETS=${PUBLIC_ASSETS_JOINED}"
"-DSTAMP_FILE=${UI_STAMP}"
"-DNPM_DIR=${PROJECT_SOURCE_DIR}/tools/ui"
-P ${PROJECT_SOURCE_DIR}/scripts/ui-download.cmake
COMMENT "Building/provisioning UI assets (npm build -> HF Bucket fallback)"
)
set(UI_SOURCE "provisioned")
set(UI_SOURCE_DIR "${PROJECT_SOURCE_DIR}/build/tools/ui/dist")
endif()
# Process assets from the determined source
if(UI_SOURCE_DIR)
foreach(asset ${PUBLIC_ASSETS})
set(input "${UI_SOURCE_DIR}/${asset}")
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
list(APPEND TARGET_SRCS ${output})
if(UI_SOURCE STREQUAL "local")
if(NOT EXISTS "${input}")
message(FATAL_ERROR "UI asset not found: ${input}")
endif()
set(dependency "${input}")
else()
set(dependency "${UI_STAMP}")
endif()
add_custom_command(
DEPENDS ${dependency}
OUTPUT "${output}"
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
)
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
endforeach()
list(APPEND UI_COMPILE_DEFS
LLAMA_BUILD_UI
LLAMA_UI_DEFAULT_ENABLED=1
)
message(STATUS "UI: embedded with source: ${UI_SOURCE}")
else()
message(WARNING "UI: no source available. Neither local build (build/tools/ui/dist/) nor HF Bucket download succeeded.")
message(WARNING "UI: building server without embedded UI. Set LLAMA_BUILD_UI=OFF to suppress this warning.")
list(APPEND UI_COMPILE_DEFS LLAMA_UI_DEFAULT_ENABLED=0)
endif()
# Resolve HF asset version: explicit env var > derived from build number > unset
if(DEFINED ENV{HF_WEBUI_VERSION})
set(HF_UI_VERSION "$ENV{HF_WEBUI_VERSION}")
message(DEPRECATION "HF_WEBUI_VERSION env var is deprecated, use HF_UI_VERSION instead")
elseif(DEFINED ENV{HF_UI_VERSION})
set(HF_UI_VERSION "$ENV{HF_UI_VERSION}")
elseif(DEFINED LLAMA_BUILD_NUMBER)
set(HF_UI_VERSION "b${LLAMA_BUILD_NUMBER}")
else()
list(APPEND UI_COMPILE_DEFS LLAMA_UI_DEFAULT_ENABLED=0)
set(HF_UI_VERSION "")
endif()
# Build the static library
add_library(${TARGET} STATIC ui.cpp)
if(NOT "${HF_UI_VERSION}" STREQUAL "" AND NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
message(FATAL_ERROR "UI: invalid HF_UI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
endif()
target_include_directories(${TARGET} PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
set(UI_CPP "${CMAKE_CURRENT_BINARY_DIR}/ui.cpp")
set(UI_H "${CMAKE_CURRENT_BINARY_DIR}/ui.h")
if(CMAKE_CROSSCOMPILING)
find_program(HOST_CXX_COMPILER NAMES g++ clang++ NO_CMAKE_FIND_ROOT_PATH)
if(NOT HOST_CXX_COMPILER)
message(FATAL_ERROR "UI: no host C++ compiler (g++/clang++) found to build llama-ui-embed; set -DHOST_CXX_COMPILER=<path>")
endif()
message(STATUS "UI: building llama-ui-embed with host compiler ${HOST_CXX_COMPILER}")
if(CMAKE_HOST_WIN32)
set(LLAMA_UI_EMBED_EXE "${CMAKE_CURRENT_BINARY_DIR}/llama-ui-embed.exe")
else()
set(LLAMA_UI_EMBED_EXE "${CMAKE_CURRENT_BINARY_DIR}/llama-ui-embed")
endif()
add_custom_command(
OUTPUT "${LLAMA_UI_EMBED_EXE}"
COMMAND "${HOST_CXX_COMPILER}" -O2 -std=c++17
-o "${LLAMA_UI_EMBED_EXE}" "${CMAKE_CURRENT_SOURCE_DIR}/embed.cpp"
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/embed.cpp"
COMMENT "Building llama-ui-embed (host)"
VERBATIM
)
add_custom_target(llama-ui-embed DEPENDS "${LLAMA_UI_EMBED_EXE}")
else()
add_executable(llama-ui-embed embed.cpp)
target_compile_features(llama-ui-embed PRIVATE cxx_std_17)
set_target_properties(llama-ui-embed PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
)
set(LLAMA_UI_EMBED_EXE "$<TARGET_FILE:llama-ui-embed>")
endif()
# Run the provisioning script every build so source changes in tools/ui/ are
# always picked up. The script uses copy_if_different for ui.cpp/ui.h, so the
# library only recompiles when contents actually change.
add_custom_target(llama-ui-assets ALL
BYPRODUCTS ${UI_CPP} ${UI_H}
COMMAND ${CMAKE_COMMAND}
"-DUI_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}"
"-DUI_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}"
"-DLLAMA_SOURCE_DIR=${PROJECT_SOURCE_DIR}"
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
"-DHF_VERSION=${HF_UI_VERSION}"
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
"-DBUILD_UI=${LLAMA_BUILD_UI}"
"-DLLAMA_UI_EMBED=${LLAMA_UI_EMBED_EXE}"
-P "${PROJECT_SOURCE_DIR}/scripts/ui-assets.cmake"
COMMENT "Provisioning UI assets"
VERBATIM
)
target_compile_definitions(${TARGET} PUBLIC ${UI_COMPILE_DEFS})
add_dependencies(llama-ui-assets llama-ui-embed)
if(TARGET_SRCS)
# List generated .hpp files as sources so CMake tracks them as build dependencies
target_sources(${TARGET} PRIVATE ${TARGET_SRCS})
set_source_files_properties(${TARGET_SRCS} PROPERTIES HEADER_FILE_ONLY TRUE)
endif()
set_source_files_properties(${UI_CPP} ${UI_H} PROPERTIES GENERATED TRUE)
add_library(${TARGET} STATIC ${UI_CPP} ${UI_H})
target_compile_features(${TARGET} PRIVATE cxx_std_17)
add_dependencies(${TARGET} llama-ui-assets)
target_include_directories(${TARGET} PUBLIC
${CMAKE_CURRENT_BINARY_DIR}
)

144
tools/ui/embed.cpp Normal file
View file

@ -0,0 +1,144 @@
// llama-ui-embed: generate ui.cpp / ui.h that embed UI assets as C arrays.
//
// Usage:
// llama-ui-embed <out_cpp> <out_h> [<asset_name> <asset_path>]...
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <fstream>
#include <string>
#include <vector>
static bool read_file(const std::string & path, std::vector<unsigned char> & out) {
std::ifstream f(path, std::ios::binary | std::ios::ate);
if (!f) {
fprintf(stderr, "embed: cannot open %s\n", path.c_str());
return false;
}
const auto sz = f.tellg();
if (sz < 0) {
return false;
}
f.seekg(0);
out.resize(static_cast<size_t>(sz));
if (sz > 0 && !f.read(reinterpret_cast<char *>(out.data()), sz)) {
return false;
}
return true;
}
static void append_bytes_hex(std::string & out, const std::vector<unsigned char> & bytes) {
static const char hex[] = "0123456789abcdef";
out.reserve(out.size() + bytes.size() * 5);
for (unsigned char b : bytes) {
out += '0';
out += 'x';
out += hex[b >> 4];
out += hex[b & 0xf];
out += ',';
}
}
static bool write_if_different(const std::string & path, const std::string & content) {
std::ifstream f(path, std::ios::binary | std::ios::ate);
if (f) {
const auto sz = f.tellg();
if (sz >= 0 && static_cast<size_t>(sz) == content.size()) {
std::string existing(static_cast<size_t>(sz), '\0');
f.seekg(0);
if (sz == 0 || f.read(existing.data(), sz)) {
if (existing == content) {
return true;
}
}
}
}
std::ofstream out(path, std::ios::binary | std::ios::trunc);
if (!out) {
fprintf(stderr, "embed: cannot write %s\n", path.c_str());
return false;
}
if (!content.empty()) {
out.write(content.data(), static_cast<std::streamsize>(content.size()));
}
return out.good();
}
static std::string fmt(const char * pattern, ...) {
char tmp[512];
va_list ap;
va_start(ap, pattern);
const int n = vsnprintf(tmp, sizeof(tmp), pattern, ap);
va_end(ap);
return (n > 0) ? std::string(tmp, static_cast<size_t>(n)) : std::string();
}
int main(int argc, char ** argv) {
if (argc < 3 || ((argc - 3) % 2) != 0) {
fprintf(stderr, "usage: %s <out_cpp> <out_h> [<name> <path>]...\n", argv[0]);
return 1;
}
const std::string out_cpp = argv[1];
const std::string out_h = argv[2];
const int n_assets = (argc - 3) / 2;
std::string h;
h += "#pragma once\n\n#include <stddef.h>\n\n";
if (n_assets > 0) {
h += "#define LLAMA_UI_HAS_ASSETS 1\n\n";
}
h +=
"struct llama_ui_asset {\n"
" const char * name;\n"
" const unsigned char * data;\n"
" size_t size;\n"
"};\n\n"
"const llama_ui_asset * llama_ui_find_asset(const char * name);\n";
std::string cpp;
cpp += "#include \"ui.h\"\n\n#include <string.h>\n\n";
if (n_assets > 0) {
for (int i = 0; i < n_assets; i++) {
const char * path = argv[3 + i * 2 + 1];
std::vector<unsigned char> bytes;
if (!read_file(path, bytes)) {
return 1;
}
cpp += fmt("static const unsigned char asset_%d_data[] = {", i);
append_bytes_hex(cpp, bytes);
cpp += fmt("};\nstatic const size_t asset_%d_size = %lu;\n\n",
i, static_cast<unsigned long>(bytes.size()));
}
cpp += "static const llama_ui_asset g_assets[] = {\n";
for (int i = 0; i < n_assets; i++) {
const char * name = argv[3 + i * 2];
cpp += fmt(" { \"%s\", asset_%d_data, asset_%d_size },\n", name, i, i);
}
cpp += "};\n\n";
cpp +=
"const llama_ui_asset * llama_ui_find_asset(const char * name) {\n"
" for (const auto & a : g_assets) {\n"
" if (strcmp(a.name, name) == 0) {\n"
" return &a;\n"
" }\n"
" }\n"
" return nullptr;\n"
"}\n";
} else {
cpp +=
"const llama_ui_asset * llama_ui_find_asset(const char *) {\n"
" return nullptr;\n"
"}\n";
}
bool ok = true;
ok = write_if_different(out_h, h) && ok;
ok = write_if_different(out_cpp, cpp) && ok;
return ok ? 0 : 1;
}

View file

@ -19,7 +19,7 @@ const GUIDE_FOR_FRONTEND = `
-->
`.trim();
const OUTPUT_DIR = '../../build/tools/ui/dist';
const OUTPUT_DIR = process.env.LLAMA_UI_OUT_DIR ?? './dist';
export function llamaCppBuildPlugin(): Plugin {
return {

15
tools/ui/sources.cmake Normal file
View file

@ -0,0 +1,15 @@
# Inputs used to decide whether the npm build output is up-to-date.
set(UI_SOURCE_GLOBS
src/*
static/*
)
set(UI_SOURCE_FILES
package.json
package-lock.json
vite.config.ts
svelte.config.js
tsconfig.json
scripts/vite-plugin-llama-cpp-build.ts
)

View file

@ -2,6 +2,10 @@ import { mdsvex } from 'mdsvex';
import adapter from '@sveltejs/adapter-static';
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
// CMake sets LLAMA_UI_OUT_DIR to the staging dir under the build tree; manual
// `npm run build` runs without the env var default to ./dist.
const outDir = process.env.LLAMA_UI_OUT_DIR ?? './dist';
/** @type {import('@sveltejs/kit').Config} */
const config = {
// Consult https://svelte.dev/docs/kit/integrations
@ -15,8 +19,8 @@ const config = {
},
router: { type: 'hash' },
adapter: adapter({
pages: '../../build/tools/ui/dist',
assets: '../../build/tools/ui/dist',
pages: outDir,
assets: outDir,
fallback: 'index.html',
precompress: false,
strict: true

View file

@ -1,7 +0,0 @@
#ifdef LLAMA_BUILD_UI
// auto generated files (see README.md for details)
#include "index.html.hpp"
#include "bundle.js.hpp"
#include "bundle.css.hpp"
#include "loading.html.hpp"
#endif

View file

@ -1,17 +0,0 @@
#pragma once
// TODO @ngxson : refactor, wrap these in a function
#ifdef LLAMA_BUILD_UI
extern unsigned char index_html[];
extern unsigned int index_html_len;
extern unsigned char bundle_js[];
extern unsigned int bundle_js_len;
extern unsigned char bundle_css[];
extern unsigned int bundle_css_len;
extern unsigned char loading_html[];
extern unsigned int loading_html_len;
#endif