mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-25 14:53:39 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/release.yml # .github/workflows/server.yml # .github/workflows/ui-build.yml # .github/workflows/ui-publish.yml # CMakeLists.txt # docs/autoparser.md # docs/backend/snapdragon/CMakeUserPresets.json # docs/backend/snapdragon/README.md # docs/backend/snapdragon/windows.md # docs/function-calling.md # examples/model-conversion/scripts/embedding/run-original-model.py # ggml/src/ggml-hexagon/htp/hmx-flash-attn-ops.c # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-opencl/kernels/cvt.cl # ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemm_moe_q4_0_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemm_moe_q4_1_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemm_moe_q4_k_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemm_moe_q5_0_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemm_moe_q5_1_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemm_moe_q5_k_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemm_moe_q6_k_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemv_moe_mxfp4_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemv_moe_q4_0_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemv_moe_q4_1_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemv_moe_q4_k_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemv_moe_q5_0_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemv_moe_q5_1_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemv_moe_q5_k_f32_ns.cl # ggml/src/ggml-opencl/kernels/gemv_moe_q6_k_f32_ns.cl # ggml/src/ggml-sycl/common.hpp # ggml/src/ggml-sycl/dmmv.cpp # ggml/src/ggml-sycl/gated_delta_net.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-vulkan/CMakeLists.txt # ggml/src/ggml-zendnn/CMakeLists.txt # ggml/src/ggml-zendnn/ggml-zendnn.cpp # requirements/requirements-convert_hf_to_gguf.txt # scripts/snapdragon/windows/setup-build.ps1 # tools/perplexity/perplexity.cpp
This commit is contained in:
commit
8ca4283f55
17 changed files with 323 additions and 203 deletions
|
|
@ -219,6 +219,7 @@ struct common_chat_parser_params {
|
|||
bool reasoning_in_content = false;
|
||||
std::string generation_prompt;
|
||||
bool parse_tool_calls = true;
|
||||
bool is_continuation = false;
|
||||
bool echo = false; // Include assistant prefilled msg in output
|
||||
bool debug = false; // Enable debug output for PEG parser
|
||||
common_peg_arena parser = {};
|
||||
|
|
|
|||
|
|
@ -618,11 +618,7 @@ struct common_params {
|
|||
std::map<std::string, std::string> default_template_kwargs;
|
||||
|
||||
// UI configs
|
||||
#ifdef LLAMA_UI_DEFAULT_ENABLED
|
||||
bool ui = LLAMA_UI_DEFAULT_ENABLED != 0;
|
||||
#else
|
||||
bool ui = true; // default to enabled when not set
|
||||
#endif
|
||||
bool ui = true;
|
||||
|
||||
// Deprecated: use ui, ui_mcp_proxy, ui_config_json instead
|
||||
bool webui = ui;
|
||||
|
|
|
|||
|
|
@ -306,7 +306,7 @@ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_
|
|||
GGML_ASSERT(tensor);
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
|
||||
if (n_copies <= 1 || backend->iface.set_tensor_2d_async == NULL) {
|
||||
if (n_copies <= 1 || backend->iface.get_tensor_2d_async == NULL) {
|
||||
for (size_t i = 0; i < n_copies; i++) {
|
||||
ggml_backend_tensor_get_async(backend, tensor, (char *) data + i*stride_data, offset + i*stride_tensor, size);
|
||||
}
|
||||
|
|
@ -317,7 +317,7 @@ void ggml_backend_tensor_get_2d_async(ggml_backend_t backend, const struct ggml_
|
|||
}
|
||||
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
||||
GGML_ASSERT(offset + (n_copies-1)*stride_tensor + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
||||
backend->iface.get_tensor_2d_async(backend, tensor, data, offset, size, n_copies, stride_tensor, stride_data);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1465,6 +1465,12 @@ bool llama_model_base::load_tensors(llama_model_loader & ml) {
|
|||
if (!layer.ssm_beta_s && layer.ssm_beta) {
|
||||
layer.ssm_beta_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.nextn.eh_proj_s && layer.nextn.eh_proj) {
|
||||
layer.nextn.eh_proj_s = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.nextn.shared_head_head_s && layer.nextn.shared_head_head) {
|
||||
layer.nextn.shared_head_head_s = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
|
||||
// input scales
|
||||
if (!layer.wq_in_s && layer.wq) {
|
||||
|
|
@ -1524,6 +1530,12 @@ bool llama_model_base::load_tensors(llama_model_loader & ml) {
|
|||
if (!layer.ssm_beta_in_s && layer.ssm_beta) {
|
||||
layer.ssm_beta_in_s = create_tensor(tn(LLM_TENSOR_SSM_BETA, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.nextn.eh_proj_in_s && layer.nextn.eh_proj) {
|
||||
layer.nextn.eh_proj_in_s = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
if (!layer.nextn.shared_head_head_in_s && layer.nextn.shared_head_head) {
|
||||
layer.nextn.shared_head_head_in_s = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "input_scale", i), {1}, TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
}
|
||||
// output scales
|
||||
if (output && output->type == GGML_TYPE_NVFP4) {
|
||||
|
|
|
|||
|
|
@ -202,12 +202,16 @@ struct llama_layer_shortconv {
|
|||
};
|
||||
|
||||
struct llama_layer_nextn {
|
||||
struct ggml_tensor * eh_proj = nullptr;
|
||||
struct ggml_tensor * embed_tokens = nullptr;
|
||||
struct ggml_tensor * enorm = nullptr;
|
||||
struct ggml_tensor * hnorm = nullptr;
|
||||
struct ggml_tensor * shared_head_head = nullptr;
|
||||
struct ggml_tensor * shared_head_norm = nullptr;
|
||||
struct ggml_tensor * eh_proj = nullptr;
|
||||
struct ggml_tensor * eh_proj_s = nullptr;
|
||||
struct ggml_tensor * eh_proj_in_s = nullptr;
|
||||
struct ggml_tensor * embed_tokens = nullptr;
|
||||
struct ggml_tensor * enorm = nullptr;
|
||||
struct ggml_tensor * hnorm = nullptr;
|
||||
struct ggml_tensor * shared_head_head = nullptr;
|
||||
struct ggml_tensor * shared_head_head_s = nullptr;
|
||||
struct ggml_tensor * shared_head_head_in_s = nullptr;
|
||||
struct ggml_tensor * shared_head_norm = nullptr;
|
||||
};
|
||||
|
||||
struct llama_layer {
|
||||
|
|
|
|||
|
|
@ -538,7 +538,7 @@ llama_model_qwen35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
|
|||
ggml_tensor * concat = ggml_concat(ctx0, e_norm, h_norm, /*dim=*/ 0);
|
||||
cb(concat, "mtp_concat", il);
|
||||
|
||||
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat);
|
||||
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat, layer.nextn.eh_proj_s);
|
||||
cb(cur, "mtp_eh_proj", il);
|
||||
|
||||
ggml_tensor * inpSA = cur;
|
||||
|
|
@ -626,8 +626,9 @@ llama_model_qwen35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
|
|||
cb(cur, "mtp_shared_head_norm", -1);
|
||||
|
||||
ggml_tensor * head_w = layer.nextn.shared_head_head ? layer.nextn.shared_head_head : model.output;
|
||||
ggml_tensor * head_s = layer.nextn.shared_head_head ? layer.nextn.shared_head_head_s : model.output_s;
|
||||
GGML_ASSERT(head_w && "QWEN35 MTP: missing LM head (nextn.shared_head_head or model.output)");
|
||||
cur = build_lora_mm(head_w, cur);
|
||||
cur = build_lora_mm(head_w, cur, head_s);
|
||||
cb(cur, "result_output", -1);
|
||||
|
||||
res->t_logits = cur;
|
||||
|
|
|
|||
|
|
@ -602,7 +602,7 @@ llama_model_qwen35moe::graph_mtp::graph_mtp(const llama_model & model, const llm
|
|||
ggml_tensor * concat = ggml_concat(ctx0, e_norm, h_norm, /*dim=*/ 0);
|
||||
cb(concat, "mtp_concat", il);
|
||||
|
||||
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat);
|
||||
ggml_tensor * cur = build_lora_mm(layer.nextn.eh_proj, concat, layer.nextn.eh_proj_s);
|
||||
cb(cur, "mtp_eh_proj", il);
|
||||
|
||||
ggml_tensor * inpSA = cur;
|
||||
|
|
@ -722,8 +722,9 @@ llama_model_qwen35moe::graph_mtp::graph_mtp(const llama_model & model, const llm
|
|||
cb(cur, "mtp_shared_head_norm", -1);
|
||||
|
||||
ggml_tensor * head_w = layer.nextn.shared_head_head ? layer.nextn.shared_head_head : model.output;
|
||||
ggml_tensor * head_s = layer.nextn.shared_head_head ? layer.nextn.shared_head_head_s : model.output_s;
|
||||
GGML_ASSERT(head_w && "QWEN35MOE MTP: missing LM head (nextn.shared_head_head or model.output)");
|
||||
cur = build_lora_mm(head_w, cur);
|
||||
cur = build_lora_mm(head_w, cur, head_s);
|
||||
cb(cur, "result_output", -1);
|
||||
|
||||
res->t_logits = cur;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,12 @@
|
|||
-r ../../requirements/requirements-convert_legacy_llama.txt
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
pillow~=11.3.0
|
||||
torch~=2.6.0
|
||||
torchvision~=0.21.0
|
||||
|
||||
## Embedding Gemma requires PyTorch 2.6.0 or later, bumped to 2.11.0 for compatibility
|
||||
torch==2.11.0; platform_machine != "s390x" # check_requirements: ignore "=="
|
||||
torchvision==0.26.0; platform_machine != "s390x" # check_requirements: ignore "=="
|
||||
|
||||
# torch s390x packages can only be found from nightly builds
|
||||
--extra-index-url https://download.pytorch.org/whl/nightly
|
||||
torch>=0.0.0.dev0; platform_machine == "s390x" # check_requirements: ignore "=="
|
||||
torchvision>=0.0.0.dev0; platform_machine == "s390x" # check_requirements: ignore "=="
|
||||
|
|
|
|||
|
|
@ -231,16 +231,19 @@ bool server_http_context::init(const common_params & params) {
|
|||
};
|
||||
|
||||
auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
|
||||
(void)req; // suppress unused parameter warning when LLAMA_BUILD_UI is not defined
|
||||
bool ready = is_ready.load();
|
||||
if (!ready) {
|
||||
#if defined(LLAMA_BUILD_UI)
|
||||
#if defined(LLAMA_UI_HAS_ASSETS)
|
||||
auto tmp = string_split<std::string>(req.path, '.');
|
||||
if (req.path == "/" || (tmp.size() > 0 && tmp.back() == "html")) {
|
||||
res.status = 503;
|
||||
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
|
||||
return false;
|
||||
if (const llama_ui_asset * a = llama_ui_find_asset("loading.html")) {
|
||||
res.status = 503;
|
||||
res.set_content(reinterpret_cast<const char*>(a->data), a->size, "text/html; charset=utf-8");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void)req;
|
||||
#endif
|
||||
// no endpoints are allowed to be accessed when the server is not ready
|
||||
// this is to prevent any data races or inconsistent states
|
||||
|
|
@ -312,23 +315,27 @@ bool server_http_context::init(const common_params & params) {
|
|||
return 1;
|
||||
}
|
||||
} else {
|
||||
#if defined(LLAMA_BUILD_UI)
|
||||
// using embedded static index.html
|
||||
srv->Get(params.api_prefix + "/", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
// COEP and COOP headers, required by pyodide (python interpreter)
|
||||
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
res.set_content(reinterpret_cast<const char*>(index_html), index_html_len, "text/html; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
srv->Get(params.api_prefix + "/bundle.js", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
res.set_content(reinterpret_cast<const char*>(bundle_js), bundle_js_len, "application/javascript; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
srv->Get(params.api_prefix + "/bundle.css", [](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
res.set_content(reinterpret_cast<const char*>(bundle_css), bundle_css_len, "text/css; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
#if defined(LLAMA_UI_HAS_ASSETS)
|
||||
auto serve_asset = [](const std::string & name, const char * mime, bool with_isolation_headers) {
|
||||
return [name, mime, with_isolation_headers](const httplib::Request & /*req*/, httplib::Response & res) {
|
||||
const llama_ui_asset * a = llama_ui_find_asset(name.c_str());
|
||||
if (!a) {
|
||||
res.status = 404;
|
||||
return false;
|
||||
}
|
||||
if (with_isolation_headers) {
|
||||
// COEP and COOP headers, required by pyodide (python interpreter)
|
||||
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
|
||||
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
|
||||
}
|
||||
res.set_content(reinterpret_cast<const char*>(a->data), a->size, mime);
|
||||
return false;
|
||||
};
|
||||
};
|
||||
|
||||
srv->Get(params.api_prefix + "/", serve_asset("index.html", "text/html; charset=utf-8", true));
|
||||
srv->Get(params.api_prefix + "/bundle.js", serve_asset("bundle.js", "application/javascript; charset=utf-8", false));
|
||||
srv->Get(params.api_prefix + "/bundle.css", serve_asset("bundle.css", "text/css; charset=utf-8", false));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ task_result_state::task_result_state(const common_chat_parser_params & chat_pars
|
|||
, oai_resp_id("resp_" + random_string())
|
||||
, oai_resp_reasoning_id("rs_" + random_string())
|
||||
, oai_resp_message_id("msg_" + random_string()) {
|
||||
if (!chat_parser_params.echo) {
|
||||
if (chat_parser_params.is_continuation && !chat_parser_params.echo) {
|
||||
// initialize chat_msg to avoid emitting a delta containing the assistant prefill
|
||||
chat_msg = common_chat_parse("", true, chat_parser_params);
|
||||
}
|
||||
|
|
@ -432,6 +432,10 @@ task_params server_task::params_from_json_cmpl(
|
|||
if (data.contains("chat_parser")) {
|
||||
params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());
|
||||
}
|
||||
if (data.contains("continue_final_message")) {
|
||||
auto continuation = common_chat_continuation_parse(data.at("continue_final_message"));
|
||||
params.chat_parser_params.is_continuation = continuation != COMMON_CHAT_CONTINUATION_NONE;
|
||||
}
|
||||
params.chat_parser_params.echo = json_value(data, "echo", false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,150 +1,98 @@
|
|||
set(TARGET llama-ui)
|
||||
|
||||
# Deprecated: use LLAMA_UI_HF_BUCKET instead
|
||||
set(LLAMA_WEBUI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt webui assets (deprecated: use LLAMA_UI_HF_BUCKET)")
|
||||
set(LLAMA_UI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt UI assets")
|
||||
|
||||
# Backward compat: forward old var to new one
|
||||
if(DEFINED LLAMA_WEBUI_HF_BUCKET AND NOT DEFINED LLAMA_UI_HF_BUCKET)
|
||||
if(DEFINED LLAMA_BUILD_WEBUI)
|
||||
set(LLAMA_BUILD_UI ${LLAMA_BUILD_WEBUI})
|
||||
message(DEPRECATION "LLAMA_BUILD_WEBUI is deprecated, use LLAMA_BUILD_UI instead")
|
||||
endif()
|
||||
if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
|
||||
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
|
||||
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
|
||||
endif()
|
||||
if(DEFINED LLAMA_WEBUI_HF_BUCKET)
|
||||
set(LLAMA_UI_HF_BUCKET ${LLAMA_WEBUI_HF_BUCKET})
|
||||
elseif(DEFINED LLAMA_WEBUI_HF_BUCKET AND NOT "${LLAMA_WEBUI_HF_BUCKET}" STREQUAL "${LLAMA_UI_HF_BUCKET}")
|
||||
message(DEPRECATION "LLAMA_WEBUI_HF_BUCKET is deprecated, use LLAMA_UI_HF_BUCKET instead")
|
||||
endif()
|
||||
|
||||
set(TARGET_SRCS "")
|
||||
set(UI_COMPILE_DEFS "")
|
||||
|
||||
if(LLAMA_BUILD_UI)
|
||||
set(PUBLIC_ASSETS
|
||||
index.html
|
||||
bundle.js
|
||||
bundle.css
|
||||
loading.html
|
||||
)
|
||||
|
||||
# Determine source of UI assets (priority: local > HF Bucket)
|
||||
set(UI_SOURCE "")
|
||||
set(UI_SOURCE_DIR "")
|
||||
|
||||
# Priority 1: Check for local build output
|
||||
set(LOCAL_UI_DIR "${PROJECT_SOURCE_DIR}/build/tools/ui/dist")
|
||||
|
||||
# Verify all required assets exist before declaring local source valid
|
||||
set(ALL_ASSETS_PRESENT TRUE)
|
||||
foreach(asset ${PUBLIC_ASSETS})
|
||||
if(NOT EXISTS "${LOCAL_UI_DIR}/${asset}")
|
||||
set(ALL_ASSETS_PRESENT FALSE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(ALL_ASSETS_PRESENT)
|
||||
set(UI_SOURCE "local")
|
||||
set(UI_SOURCE_DIR "${LOCAL_UI_DIR}")
|
||||
message(STATUS "UI: using local build from ${UI_SOURCE_DIR}")
|
||||
endif()
|
||||
|
||||
# Priority 2: Build-time asset provisioning (npm build → HF Bucket fallback)
|
||||
if(NOT UI_SOURCE_DIR)
|
||||
# Environment variable takes precedence (e.g., from CI workflows)
|
||||
# Deprecated: use HF_UI_VERSION instead
|
||||
if(DEFINED ENV{HF_WEBUI_VERSION})
|
||||
set(HF_UI_VERSION "$ENV{HF_WEBUI_VERSION}")
|
||||
message(DEPRECATION "HF_WEBUI_VERSION env var is deprecated, use HF_UI_VERSION instead")
|
||||
if(NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
|
||||
message(FATAL_ERROR "UI: invalid HF_WEBUI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
|
||||
endif()
|
||||
elseif(DEFINED ENV{HF_UI_VERSION})
|
||||
set(HF_UI_VERSION "$ENV{HF_UI_VERSION}")
|
||||
if(NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
|
||||
message(FATAL_ERROR "UI: invalid HF_UI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
|
||||
endif()
|
||||
elseif(DEFINED LLAMA_BUILD_NUMBER)
|
||||
set(HF_UI_VERSION "b${LLAMA_BUILD_NUMBER}")
|
||||
message(STATUS "UI: derived HF_UI_VERSION=b${LLAMA_BUILD_NUMBER}")
|
||||
else()
|
||||
set(HF_UI_VERSION "")
|
||||
message(STATUS "UI: version not specified (will use HF 'latest')")
|
||||
endif()
|
||||
|
||||
if("${HF_UI_VERSION}" STREQUAL "")
|
||||
set(UI_VERSION_TAG "provisioned")
|
||||
else()
|
||||
set(UI_VERSION_TAG "${HF_UI_VERSION}")
|
||||
endif()
|
||||
set(UI_STAMP "${CMAKE_CURRENT_BINARY_DIR}/.ui-${UI_VERSION_TAG}.stamp")
|
||||
|
||||
string(REPLACE ";" "+" PUBLIC_ASSETS_JOINED "${PUBLIC_ASSETS}")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${UI_STAMP}
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
"-DSOURCE_DIR=${PROJECT_SOURCE_DIR}"
|
||||
"-DPUBLIC_DIR=${PROJECT_SOURCE_DIR}/build/tools/ui/dist"
|
||||
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
|
||||
"-DHF_VERSION=${HF_UI_VERSION}"
|
||||
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
|
||||
"-DASSETS=${PUBLIC_ASSETS_JOINED}"
|
||||
"-DSTAMP_FILE=${UI_STAMP}"
|
||||
"-DNPM_DIR=${PROJECT_SOURCE_DIR}/tools/ui"
|
||||
-P ${PROJECT_SOURCE_DIR}/scripts/ui-download.cmake
|
||||
COMMENT "Building/provisioning UI assets (npm build -> HF Bucket fallback)"
|
||||
)
|
||||
|
||||
set(UI_SOURCE "provisioned")
|
||||
set(UI_SOURCE_DIR "${PROJECT_SOURCE_DIR}/build/tools/ui/dist")
|
||||
endif()
|
||||
|
||||
# Process assets from the determined source
|
||||
if(UI_SOURCE_DIR)
|
||||
foreach(asset ${PUBLIC_ASSETS})
|
||||
set(input "${UI_SOURCE_DIR}/${asset}")
|
||||
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
|
||||
list(APPEND TARGET_SRCS ${output})
|
||||
|
||||
if(UI_SOURCE STREQUAL "local")
|
||||
if(NOT EXISTS "${input}")
|
||||
message(FATAL_ERROR "UI asset not found: ${input}")
|
||||
endif()
|
||||
set(dependency "${input}")
|
||||
else()
|
||||
set(dependency "${UI_STAMP}")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
DEPENDS ${dependency}
|
||||
OUTPUT "${output}"
|
||||
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
|
||||
)
|
||||
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
|
||||
endforeach()
|
||||
|
||||
list(APPEND UI_COMPILE_DEFS
|
||||
LLAMA_BUILD_UI
|
||||
LLAMA_UI_DEFAULT_ENABLED=1
|
||||
)
|
||||
message(STATUS "UI: embedded with source: ${UI_SOURCE}")
|
||||
else()
|
||||
message(WARNING "UI: no source available. Neither local build (build/tools/ui/dist/) nor HF Bucket download succeeded.")
|
||||
message(WARNING "UI: building server without embedded UI. Set LLAMA_BUILD_UI=OFF to suppress this warning.")
|
||||
list(APPEND UI_COMPILE_DEFS LLAMA_UI_DEFAULT_ENABLED=0)
|
||||
endif()
|
||||
# Resolve HF asset version: explicit env var > derived from build number > unset
|
||||
if(DEFINED ENV{HF_WEBUI_VERSION})
|
||||
set(HF_UI_VERSION "$ENV{HF_WEBUI_VERSION}")
|
||||
message(DEPRECATION "HF_WEBUI_VERSION env var is deprecated, use HF_UI_VERSION instead")
|
||||
elseif(DEFINED ENV{HF_UI_VERSION})
|
||||
set(HF_UI_VERSION "$ENV{HF_UI_VERSION}")
|
||||
elseif(DEFINED LLAMA_BUILD_NUMBER)
|
||||
set(HF_UI_VERSION "b${LLAMA_BUILD_NUMBER}")
|
||||
else()
|
||||
list(APPEND UI_COMPILE_DEFS LLAMA_UI_DEFAULT_ENABLED=0)
|
||||
set(HF_UI_VERSION "")
|
||||
endif()
|
||||
|
||||
# Build the static library
|
||||
add_library(${TARGET} STATIC ui.cpp)
|
||||
if(NOT "${HF_UI_VERSION}" STREQUAL "" AND NOT HF_UI_VERSION MATCHES "^[A-Za-z0-9._-]+$")
|
||||
message(FATAL_ERROR "UI: invalid HF_UI_VERSION='${HF_UI_VERSION}' - must match ^[A-Za-z0-9._-]+$")
|
||||
endif()
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
set(UI_CPP "${CMAKE_CURRENT_BINARY_DIR}/ui.cpp")
|
||||
set(UI_H "${CMAKE_CURRENT_BINARY_DIR}/ui.h")
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
find_program(HOST_CXX_COMPILER NAMES g++ clang++ NO_CMAKE_FIND_ROOT_PATH)
|
||||
if(NOT HOST_CXX_COMPILER)
|
||||
message(FATAL_ERROR "UI: no host C++ compiler (g++/clang++) found to build llama-ui-embed; set -DHOST_CXX_COMPILER=<path>")
|
||||
endif()
|
||||
message(STATUS "UI: building llama-ui-embed with host compiler ${HOST_CXX_COMPILER}")
|
||||
|
||||
if(CMAKE_HOST_WIN32)
|
||||
set(LLAMA_UI_EMBED_EXE "${CMAKE_CURRENT_BINARY_DIR}/llama-ui-embed.exe")
|
||||
else()
|
||||
set(LLAMA_UI_EMBED_EXE "${CMAKE_CURRENT_BINARY_DIR}/llama-ui-embed")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${LLAMA_UI_EMBED_EXE}"
|
||||
COMMAND "${HOST_CXX_COMPILER}" -O2 -std=c++17
|
||||
-o "${LLAMA_UI_EMBED_EXE}" "${CMAKE_CURRENT_SOURCE_DIR}/embed.cpp"
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/embed.cpp"
|
||||
COMMENT "Building llama-ui-embed (host)"
|
||||
VERBATIM
|
||||
)
|
||||
add_custom_target(llama-ui-embed DEPENDS "${LLAMA_UI_EMBED_EXE}")
|
||||
else()
|
||||
add_executable(llama-ui-embed embed.cpp)
|
||||
target_compile_features(llama-ui-embed PRIVATE cxx_std_17)
|
||||
set_target_properties(llama-ui-embed PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
)
|
||||
set(LLAMA_UI_EMBED_EXE "$<TARGET_FILE:llama-ui-embed>")
|
||||
endif()
|
||||
|
||||
# Run the provisioning script every build so source changes in tools/ui/ are
|
||||
# always picked up. The script uses copy_if_different for ui.cpp/ui.h, so the
|
||||
# library only recompiles when contents actually change.
|
||||
add_custom_target(llama-ui-assets ALL
|
||||
BYPRODUCTS ${UI_CPP} ${UI_H}
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
"-DUI_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"-DUI_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"-DLLAMA_SOURCE_DIR=${PROJECT_SOURCE_DIR}"
|
||||
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
|
||||
"-DHF_VERSION=${HF_UI_VERSION}"
|
||||
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
|
||||
"-DBUILD_UI=${LLAMA_BUILD_UI}"
|
||||
"-DLLAMA_UI_EMBED=${LLAMA_UI_EMBED_EXE}"
|
||||
-P "${PROJECT_SOURCE_DIR}/scripts/ui-assets.cmake"
|
||||
COMMENT "Provisioning UI assets"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
target_compile_definitions(${TARGET} PUBLIC ${UI_COMPILE_DEFS})
|
||||
add_dependencies(llama-ui-assets llama-ui-embed)
|
||||
|
||||
if(TARGET_SRCS)
|
||||
# List generated .hpp files as sources so CMake tracks them as build dependencies
|
||||
target_sources(${TARGET} PRIVATE ${TARGET_SRCS})
|
||||
set_source_files_properties(${TARGET_SRCS} PROPERTIES HEADER_FILE_ONLY TRUE)
|
||||
endif()
|
||||
set_source_files_properties(${UI_CPP} ${UI_H} PROPERTIES GENERATED TRUE)
|
||||
|
||||
add_library(${TARGET} STATIC ${UI_CPP} ${UI_H})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||
add_dependencies(${TARGET} llama-ui-assets)
|
||||
|
||||
target_include_directories(${TARGET} PUBLIC
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
)
|
||||
|
|
|
|||
144
tools/ui/embed.cpp
Normal file
144
tools/ui/embed.cpp
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
// llama-ui-embed: generate ui.cpp / ui.h that embed UI assets as C arrays.
|
||||
//
|
||||
// Usage:
|
||||
// llama-ui-embed <out_cpp> <out_h> [<asset_name> <asset_path>]...
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
static bool read_file(const std::string & path, std::vector<unsigned char> & out) {
|
||||
std::ifstream f(path, std::ios::binary | std::ios::ate);
|
||||
if (!f) {
|
||||
fprintf(stderr, "embed: cannot open %s\n", path.c_str());
|
||||
return false;
|
||||
}
|
||||
const auto sz = f.tellg();
|
||||
if (sz < 0) {
|
||||
return false;
|
||||
}
|
||||
f.seekg(0);
|
||||
out.resize(static_cast<size_t>(sz));
|
||||
if (sz > 0 && !f.read(reinterpret_cast<char *>(out.data()), sz)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void append_bytes_hex(std::string & out, const std::vector<unsigned char> & bytes) {
|
||||
static const char hex[] = "0123456789abcdef";
|
||||
out.reserve(out.size() + bytes.size() * 5);
|
||||
for (unsigned char b : bytes) {
|
||||
out += '0';
|
||||
out += 'x';
|
||||
out += hex[b >> 4];
|
||||
out += hex[b & 0xf];
|
||||
out += ',';
|
||||
}
|
||||
}
|
||||
|
||||
static bool write_if_different(const std::string & path, const std::string & content) {
|
||||
std::ifstream f(path, std::ios::binary | std::ios::ate);
|
||||
if (f) {
|
||||
const auto sz = f.tellg();
|
||||
if (sz >= 0 && static_cast<size_t>(sz) == content.size()) {
|
||||
std::string existing(static_cast<size_t>(sz), '\0');
|
||||
f.seekg(0);
|
||||
if (sz == 0 || f.read(existing.data(), sz)) {
|
||||
if (existing == content) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::ofstream out(path, std::ios::binary | std::ios::trunc);
|
||||
if (!out) {
|
||||
fprintf(stderr, "embed: cannot write %s\n", path.c_str());
|
||||
return false;
|
||||
}
|
||||
if (!content.empty()) {
|
||||
out.write(content.data(), static_cast<std::streamsize>(content.size()));
|
||||
}
|
||||
return out.good();
|
||||
}
|
||||
|
||||
static std::string fmt(const char * pattern, ...) {
|
||||
char tmp[512];
|
||||
va_list ap;
|
||||
va_start(ap, pattern);
|
||||
const int n = vsnprintf(tmp, sizeof(tmp), pattern, ap);
|
||||
va_end(ap);
|
||||
return (n > 0) ? std::string(tmp, static_cast<size_t>(n)) : std::string();
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 3 || ((argc - 3) % 2) != 0) {
|
||||
fprintf(stderr, "usage: %s <out_cpp> <out_h> [<name> <path>]...\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string out_cpp = argv[1];
|
||||
const std::string out_h = argv[2];
|
||||
const int n_assets = (argc - 3) / 2;
|
||||
|
||||
std::string h;
|
||||
h += "#pragma once\n\n#include <stddef.h>\n\n";
|
||||
if (n_assets > 0) {
|
||||
h += "#define LLAMA_UI_HAS_ASSETS 1\n\n";
|
||||
}
|
||||
h +=
|
||||
"struct llama_ui_asset {\n"
|
||||
" const char * name;\n"
|
||||
" const unsigned char * data;\n"
|
||||
" size_t size;\n"
|
||||
"};\n\n"
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char * name);\n";
|
||||
|
||||
std::string cpp;
|
||||
cpp += "#include \"ui.h\"\n\n#include <string.h>\n\n";
|
||||
|
||||
if (n_assets > 0) {
|
||||
for (int i = 0; i < n_assets; i++) {
|
||||
const char * path = argv[3 + i * 2 + 1];
|
||||
std::vector<unsigned char> bytes;
|
||||
if (!read_file(path, bytes)) {
|
||||
return 1;
|
||||
}
|
||||
cpp += fmt("static const unsigned char asset_%d_data[] = {", i);
|
||||
append_bytes_hex(cpp, bytes);
|
||||
cpp += fmt("};\nstatic const size_t asset_%d_size = %lu;\n\n",
|
||||
i, static_cast<unsigned long>(bytes.size()));
|
||||
}
|
||||
|
||||
cpp += "static const llama_ui_asset g_assets[] = {\n";
|
||||
for (int i = 0; i < n_assets; i++) {
|
||||
const char * name = argv[3 + i * 2];
|
||||
cpp += fmt(" { \"%s\", asset_%d_data, asset_%d_size },\n", name, i, i);
|
||||
}
|
||||
cpp += "};\n\n";
|
||||
|
||||
cpp +=
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char * name) {\n"
|
||||
" for (const auto & a : g_assets) {\n"
|
||||
" if (strcmp(a.name, name) == 0) {\n"
|
||||
" return &a;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" return nullptr;\n"
|
||||
"}\n";
|
||||
} else {
|
||||
cpp +=
|
||||
"const llama_ui_asset * llama_ui_find_asset(const char *) {\n"
|
||||
" return nullptr;\n"
|
||||
"}\n";
|
||||
}
|
||||
|
||||
bool ok = true;
|
||||
ok = write_if_different(out_h, h) && ok;
|
||||
ok = write_if_different(out_cpp, cpp) && ok;
|
||||
return ok ? 0 : 1;
|
||||
}
|
||||
|
|
@ -19,7 +19,7 @@ const GUIDE_FOR_FRONTEND = `
|
|||
-->
|
||||
`.trim();
|
||||
|
||||
const OUTPUT_DIR = '../../build/tools/ui/dist';
|
||||
const OUTPUT_DIR = process.env.LLAMA_UI_OUT_DIR ?? './dist';
|
||||
|
||||
export function llamaCppBuildPlugin(): Plugin {
|
||||
return {
|
||||
|
|
|
|||
15
tools/ui/sources.cmake
Normal file
15
tools/ui/sources.cmake
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# Inputs used to decide whether the npm build output is up-to-date.
|
||||
|
||||
set(UI_SOURCE_GLOBS
|
||||
src/*
|
||||
static/*
|
||||
)
|
||||
|
||||
set(UI_SOURCE_FILES
|
||||
package.json
|
||||
package-lock.json
|
||||
vite.config.ts
|
||||
svelte.config.js
|
||||
tsconfig.json
|
||||
scripts/vite-plugin-llama-cpp-build.ts
|
||||
)
|
||||
|
|
@ -2,6 +2,10 @@ import { mdsvex } from 'mdsvex';
|
|||
import adapter from '@sveltejs/adapter-static';
|
||||
import { vitePreprocess } from '@sveltejs/vite-plugin-svelte';
|
||||
|
||||
// CMake sets LLAMA_UI_OUT_DIR to the staging dir under the build tree; manual
|
||||
// `npm run build` runs without the env var default to ./dist.
|
||||
const outDir = process.env.LLAMA_UI_OUT_DIR ?? './dist';
|
||||
|
||||
/** @type {import('@sveltejs/kit').Config} */
|
||||
const config = {
|
||||
// Consult https://svelte.dev/docs/kit/integrations
|
||||
|
|
@ -15,8 +19,8 @@ const config = {
|
|||
},
|
||||
router: { type: 'hash' },
|
||||
adapter: adapter({
|
||||
pages: '../../build/tools/ui/dist',
|
||||
assets: '../../build/tools/ui/dist',
|
||||
pages: outDir,
|
||||
assets: outDir,
|
||||
fallback: 'index.html',
|
||||
precompress: false,
|
||||
strict: true
|
||||
|
|
|
|||
|
|
@ -1,7 +0,0 @@
|
|||
#ifdef LLAMA_BUILD_UI
|
||||
// auto generated files (see README.md for details)
|
||||
#include "index.html.hpp"
|
||||
#include "bundle.js.hpp"
|
||||
#include "bundle.css.hpp"
|
||||
#include "loading.html.hpp"
|
||||
#endif
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// TODO @ngxson : refactor, wrap these in a function
|
||||
|
||||
#ifdef LLAMA_BUILD_UI
|
||||
extern unsigned char index_html[];
|
||||
extern unsigned int index_html_len;
|
||||
|
||||
extern unsigned char bundle_js[];
|
||||
extern unsigned int bundle_js_len;
|
||||
|
||||
extern unsigned char bundle_css[];
|
||||
extern unsigned int bundle_css_len;
|
||||
|
||||
extern unsigned char loading_html[];
|
||||
extern unsigned int loading_html_len;
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue