mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 08:00:25 +00:00
* ci: add workflow to publish webui to Hugging Face bucket * ci: add webui release job to release workflow * ci: test webui release job * chore: Return to default minification strategy for build output files * ci: extract webui build into separate workflow and job * chore: Ignore webui static output + clean up references * chore: Delete legacy webui static output * chore: Ignore webui build static output * fix: Workflow * fix: Versioning naming * chore: Update package name * test: Test CI fix * refactor: Naming * server: implement webui build strategy with HF Bucket support * chore: Remove test workflow * chore: Use WebUI build workflow call in other workflows * server: HF Buckets fallback for WebUI build * refactor: App name variable * refactor: Naming * fix: Retrieve loading.html * fix: workflow syntax * fix: Rewrite malformed release.yml * fix: Req param * test: Re-add missing Playwright installation for CI tests * refactor: Logic & security improvements * refactor: Retrieve publishing jobs and DRY the workflows * fix: Test workflow syntax * fix: Upstream Release Tag for test workflow * chore: Remove test workflow * ci: Run WebUI jobs on `ubuntu-24.04-arm` * refactor: Post-CR cleanup Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * refactor: CI cleanup * refactor: Cleanup * test: Test workflow * refactor: use LLAMA_BUILD_NUMBER instead of LLAMA_BUILD_TAG for HF Bucket webui downloads * server: add fallback mechanism for HF Bucket webui downloads from latest directory * fix: Incorrect argument order in file(SHA256) calls for checksum verification * refactor: Use cmake script for handling the HF Bucket download on build time * feat: support local npm build for WebUI assets * refactor: add `HF_ENABLED` flag to control WebUI build/download provisioning * refactor: Cleanup * chore: Remove test workflow * fix: remove s390x from release workflow * fix: add webui-build dependency to ubuntu-22-rocm and windows-hip * Revert "fix: remove s390x from release workflow" This reverts commit debcfffa9bc1e3112eae41f2d29741b682e4eb19. * fix: Release workflow file * fix: Proper release tag used for HF Bucket upload * fix: Remove duplicate steps in release workflow --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
136 lines
3.9 KiB
YAML
136 lines
3.9 KiB
YAML
name: Server (self-hosted)
|
|
|
|
on:
|
|
workflow_dispatch: # allows manual triggering
|
|
inputs:
|
|
sha:
|
|
description: 'Commit SHA1 to build'
|
|
required: false
|
|
type: string
|
|
slow_tests:
|
|
description: 'Run slow tests'
|
|
required: true
|
|
type: boolean
|
|
push:
|
|
branches:
|
|
- master
|
|
paths: [
|
|
'.github/workflows/server-self-hosted.yml',
|
|
'**/CMakeLists.txt',
|
|
'**/Makefile',
|
|
'**/*.h',
|
|
'**/*.hpp',
|
|
'**/*.c',
|
|
'**/*.cpp',
|
|
'**/*.cu',
|
|
'**/*.swift',
|
|
'**/*.m',
|
|
'tools/server/**.*'
|
|
]
|
|
|
|
env:
|
|
LLAMA_LOG_COLORS: 1
|
|
LLAMA_LOG_PREFIX: 1
|
|
LLAMA_LOG_TIMESTAMPS: 1
|
|
LLAMA_LOG_VERBOSITY: 10
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
webui-build:
|
|
name: Build WebUI
|
|
uses: ./.github/workflows/webui-build.yml
|
|
|
|
server-metal:
|
|
needs: webui-build
|
|
runs-on: [self-hosted, llama-server, macOS, ARM64]
|
|
|
|
name: server-metal (${{ matrix.wf_name }})
|
|
strategy:
|
|
matrix:
|
|
build_type: [Release]
|
|
wf_name: ["GPUx1"]
|
|
include:
|
|
- build_type: Release
|
|
extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
|
|
wf_name: "GPUx1, backend-sampling"
|
|
- build_type: Release
|
|
extra_args: "GGML_METAL_DEVICES=2"
|
|
wf_name: "GPUx2"
|
|
- build_type: Release
|
|
extra_args: "GGML_METAL_DEVICES=2 LLAMA_ARG_BACKEND_SAMPLING=1"
|
|
wf_name: "GPUx2, backend-sampling"
|
|
fail-fast: false
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
with:
|
|
fetch-depth: 0
|
|
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
|
|
|
- name: Download WebUI build artifact
|
|
uses: actions/download-artifact@v7
|
|
with:
|
|
name: webui-build
|
|
path: tools/server/public/
|
|
|
|
- name: Build
|
|
id: cmake_build
|
|
run: |
|
|
cmake -B build -DGGML_SCHED_NO_REALLOC=ON
|
|
cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server
|
|
|
|
- name: Tests
|
|
id: server_integration_tests
|
|
if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
|
|
run: |
|
|
cd tools/server/tests
|
|
python3 -m venv venv
|
|
source venv/bin/activate
|
|
pip install -r requirements.txt
|
|
export ${{ matrix.extra_args }}
|
|
pytest -v -x -m "not slow"
|
|
|
|
# TODO: provision CUDA runner
|
|
# server-cuda:
|
|
# runs-on: [self-hosted, llama-server, Linux, NVIDIA]
|
|
#
|
|
# name: server-cuda (${{ matrix.wf_name }})
|
|
# strategy:
|
|
# matrix:
|
|
# build_type: [Release]
|
|
# wf_name: ["GPUx1"]
|
|
# include:
|
|
# - build_type: Release
|
|
# extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1"
|
|
# wf_name: "GPUx1, backend-sampling"
|
|
# fail-fast: false
|
|
#
|
|
# steps:
|
|
# - name: Clone
|
|
# id: checkout
|
|
# uses: actions/checkout@v6
|
|
# with:
|
|
# fetch-depth: 0
|
|
# ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
|
#
|
|
# - name: Build
|
|
# id: cmake_build
|
|
# run: |
|
|
# cmake -B build -DGGML_SCHED_NO_REALLOC=ON
|
|
# cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server
|
|
#
|
|
# - name: Tests
|
|
# id: server_integration_tests
|
|
# if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }}
|
|
# run: |
|
|
# cd tools/server/tests
|
|
# python3 -m venv venv
|
|
# source venv/bin/activate
|
|
# pip install -r requirements.txt
|
|
# export ${{ matrix.extra_args }}
|
|
# pytest -v -x -m "not slow"
|