mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 08:00:25 +00:00
* webui: Move static build output from `tools/server/public` to `build/ui` directory * refactor: Move to `tools/ui` * refactor: rename CMake variables and preprocessor defines - Rename LLAMA_BUILD_WEBUI -> LLAMA_BUILD_UI (old kept as deprecated) - Rename LLAMA_USE_PREBUILT_WEBUI -> LLAMA_USE_PREBUILT_UI (old kept as deprecated) - Backward compat: old vars auto-forward to new ones with DEPRECATION warning - Rename internal vars: WEBUI_SOURCE -> UI_SOURCE, WEBUI_SOURCE_DIR -> UI_SOURCE_DIR, etc. - Rename HF bucket: LLAMA_WEBUI_HF_BUCKET -> LLAMA_UI_HF_BUCKET - Emit both LLAMA_BUILD_WEBUI and LLAMA_BUILD_UI preprocessor defines - Emit both LLAMA_WEBUI_DEFAULT_ENABLED and LLAMA_UI_DEFAULT_ENABLED * refactor: rename CLI flags (--webui -> --ui) with backward compat - Add --ui/--no-ui (old --webui/--no-webui kept as deprecated aliases) - Add --ui-config (old --webui-config kept as deprecated alias) - Add --ui-config-file (old --webui-config-file kept as deprecated alias) - Add --ui-mcp-proxy/--no-ui-mcp-proxy (old --webui-mcp-proxy kept as deprecated) - Add new env vars: LLAMA_ARG_UI, LLAMA_ARG_UI_CONFIG, LLAMA_ARG_UI_CONFIG_FILE, LLAMA_ARG_UI_MCP_PROXY - C++ struct fields: params.ui, params.ui_config_json, params.ui_mcp_proxy added alongside old fields - Backward compat: old fields synced to new ones in g_params_to_internals * refactor: update C++ server internals with backward compat - Rename json_webui_settings -> json_ui_settings (both kept in server_context_meta) - Rename params.webui usage -> params.ui (both synced, old still works) - JSON API emits both "ui"/"ui_settings" and "webui"/"webui_settings" keys - Server routes use params.ui_mcp_proxy || params.webui_mcp_proxy - Preprocessor guards use #if defined(LLAMA_BUILD_UI) || defined(LLAMA_BUILD_WEBUI) * refactor: rename CI/CD workflows, artifacts, and build script - Rename webui-build.yml -> ui-build.yml; artifact webui-build -> ui-build - Rename webui-publish.yml -> ui-publish.yml; var HF_BUCKET_WEBUI_STATIC_OUTPUT -> HF_BUCKET_UI_STATIC_OUTPUT - Rename server-webui.yml -> server-ui.yml; job webui-build/checks -> ui-build/checks - Update server.yml: job/artifact refs webui-build -> ui-build - Update release.yml: all webui-build/publish refs -> ui-build/publish; HF_TOKEN_WEBUI_STATIC_OUTPUT -> HF_TOKEN_UI_STATIC_OUTPUT - Update server-self-hosted.yml: webui-build -> ui-build - Update build-self-hosted.yml: HF_WEBUI_VERSION -> HF_UI_VERSION - Rename webui-download.cmake -> ui-download.cmake (internal refs updated) - Update labeler.yml: server/webui -> server/ui path label * docs: update CODEOWNERS and server README docs - Update CODEOWNERS: team ggml-org/llama-webui -> ggml-org/llama-ui, path /tools/server/webui/ -> /tools/ui/ - Update server README.md: CLI tables show --ui flags with deprecated --webui aliases - Update server README-dev.md: "WebUI" -> "UI", paths updated to tools/ui/ * fix: Small fixes for UI build * fix: CMake.txt syntax * chore: Formatting * fix: `.editorconfig` for llama-ui * chore: Formatting * refactor: Use `APP_NAME` in Error route * refactor: Cleanup * refactor: Single migration service * make llama-ui a linkable target * fix: UI Build output * fix: Missing change * fix: separate llama-ui npm build output into build/tools/ui/dist subfolder + use cmake npm build instead of downloading ui-build.yml artifacts in CI * refactor: UI workflows cleanup --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
132 lines
3.8 KiB
YAML
132 lines
3.8 KiB
YAML
name: Server (self-hosted)
|
|
|
|
on:
|
|
workflow_dispatch: # allows manual triggering
|
|
inputs:
|
|
sha:
|
|
description: 'Commit SHA1 to build'
|
|
required: false
|
|
type: string
|
|
slow_tests:
|
|
description: 'Run slow tests'
|
|
required: true
|
|
type: boolean
|
|
push:
|
|
branches:
|
|
- master
|
|
paths: [
|
|
'.github/workflows/server-self-hosted.yml',
|
|
'**/CMakeLists.txt',
|
|
'**/Makefile',
|
|
'**/*.h',
|
|
'**/*.hpp',
|
|
'**/*.c',
|
|
'**/*.cpp',
|
|
'**/*.cu',
|
|
'**/*.swift',
|
|
'**/*.m',
|
|
'tools/server/**.*'
|
|
]
|
|
|
|
env:
|
|
LLAMA_LOG_COLORS: 1
|
|
LLAMA_LOG_PREFIX: 1
|
|
LLAMA_LOG_TIMESTAMPS: 1
|
|
LLAMA_LOG_VERBOSITY: 10
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
server-metal:
|
|
runs-on: [self-hosted, llama-server, macOS, ARM64]
|
|
|
|
name: server-metal (${{ matrix.wf_name }})
|
|
strategy:
|
|
matrix:
|
|
build_type: [Release]
|
|
wf_name: ["GPUx1"]
|
|
include:
|
|
- build_type: Release
|
|
extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
|
|
wf_name: "GPUx1, backend-sampling"
|
|
- build_type: Release
|
|
extra_args: "GGML_METAL_DEVICES=2"
|
|
wf_name: "GPUx2"
|
|
- build_type: Release
|
|
extra_args: "GGML_METAL_DEVICES=2 LLAMA_ARG_BACKEND_SAMPLING=1"
|
|
wf_name: "GPUx2, backend-sampling"
|
|
fail-fast: false
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
with:
|
|
fetch-depth: 0
|
|
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
|
|
|
- name: Setup Node.js
|
|
uses: actions/setup-node@v6
|
|
with:
|
|
node-version: "24"
|
|
cache: "npm"
|
|
cache-dependency-path: "tools/ui/package-lock.json"
|
|
|
|
- name: Build
|
|
id: cmake_build
|
|
run: |
|
|
cmake -B build -DGGML_SCHED_NO_REALLOC=ON
|
|
cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server
|
|
|
|
- name: Tests
|
|
id: server_integration_tests
|
|
if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
|
|
run: |
|
|
cd tools/server/tests
|
|
python3 -m venv venv
|
|
source venv/bin/activate
|
|
pip install -r requirements.txt
|
|
export ${{ matrix.extra_args }}
|
|
pytest -v -x -m "not slow"
|
|
|
|
# TODO: provision CUDA runner
|
|
# server-cuda:
|
|
# runs-on: [self-hosted, llama-server, Linux, NVIDIA]
|
|
#
|
|
# name: server-cuda (${{ matrix.wf_name }})
|
|
# strategy:
|
|
# matrix:
|
|
# build_type: [Release]
|
|
# wf_name: ["GPUx1"]
|
|
# include:
|
|
# - build_type: Release
|
|
# extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
|
|
# wf_name: "GPUx1, backend-sampling"
|
|
# fail-fast: false
|
|
#
|
|
# steps:
|
|
# - name: Clone
|
|
# id: checkout
|
|
# uses: actions/checkout@v6
|
|
# with:
|
|
# fetch-depth: 0
|
|
# ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
|
#
|
|
# - name: Build
|
|
# id: cmake_build
|
|
# run: |
|
|
# cmake -B build -DGGML_SCHED_NO_REALLOC=ON
|
|
# cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server
|
|
#
|
|
# - name: Tests
|
|
# id: server_integration_tests
|
|
# if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
|
|
# run: |
|
|
# cd tools/server/tests
|
|
# python3 -m venv venv
|
|
# source venv/bin/activate
|
|
# pip install -r requirements.txt
|
|
# export ${{ matrix.extra_args }}
|
|
# pytest -v -x -m "not slow"
|