koboldcpp/.github/workflows/build-self-hosted.yml

name: CI (self-hosted)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.cu',
      '**/*.cuh',
      '**/*.swift',
      '**/*.m',
      '**/*.metal',
      '**/*.comp',
      '**/*.glsl',
      '**/*.wgsl'
    ]

  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
      '.github/workflows/build-self-hosted.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.cu',
      '**/*.cuh',
      '**/*.swift',
      '**/*.m',
      '**/*.metal',
      '**/*.comp',
      '**/*.glsl',
      '**/*.wgsl'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  determine-tag:
    name: Determine tag name
    runs-on: ubuntu-slim
    outputs:
      tag_name: ${{ steps.tag.outputs.name }}
    steps:
      - name: Clone
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name
        env:
          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}

  ggml-ci-nvidia-cuda:
    needs: determine-tag
    runs-on: [self-hosted, Linux, NVIDIA]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        env:
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          nvidia-smi
          GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  ggml-ci-nvidia-vulkan-cm:
    needs: determine-tag
    runs-on: [self-hosted, Linux, NVIDIA]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        env:
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  ggml-ci-nvidia-vulkan-cm2:
    needs: determine-tag
    runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        env:
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  ggml-ci-nvidia-webgpu:
    runs-on: [self-hosted, Linux, NVIDIA]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Dawn Dependency
        id: dawn-depends
        run: |
          DAWN_VERSION="v20260317.182325"
          DAWN_OWNER="google"
          DAWN_REPO="dawn"
          DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
          echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
          curl -L -o artifact.tar.gz \
            "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
          mkdir dawn
          tar -xvf artifact.tar.gz -C dawn --strip-components=1

      - name: Test
        id: ggml-ci
        run: |
          GG_BUILD_WEBGPU=1 \
          GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
          GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
            bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  # TODO: provision AMX-compatible machine
  #ggml-ci-cpu-amx:
  #  runs-on: [self-hosted, Linux, CPU, AMX]

  #  steps:
  #    - name: Clone
  #      id: checkout
  #      uses: actions/checkout@v6

  #    - name: Test
  #      id: ggml-ci
  #      run: |
  #        bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  # TODO: provision AMD GPU machine
  # ggml-ci-amd-vulkan:
  #   runs-on: [self-hosted, Linux, AMD]

  #   steps:
  #     - name: Clone
  #       id: checkout
  #       uses: actions/checkout@v6

  #     - name: Test
  #       id: ggml-ci
  #       run: |
  #         vulkaninfo --summary
  #         GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  # TODO: provision AMD GPU machine
  # ggml-ci-amd-rocm:
  #   runs-on: [self-hosted, Linux, AMD]

  #   steps:
  #     - name: Clone
  #       id: checkout
  #       uses: actions/checkout@v6

  #     - name: Test
  #       id: ggml-ci
  #       run: |
  #         amd-smi static
  #         GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  ggml-ci-mac-metal:
    needs: determine-tag
    runs-on: [self-hosted, macOS, ARM64]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        env:
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-mac-webgpu:
    needs: determine-tag
    runs-on: [self-hosted, macOS, ARM64]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Dawn Dependency
        id: dawn-depends
        run: |
          DAWN_VERSION="v20260317.182325"
          DAWN_OWNER="google"
          DAWN_REPO="dawn"
          DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-macos-latest-Release"
          echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
          curl -L -o artifact.tar.gz \
            "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
          mkdir dawn
          tar -xvf artifact.tar.gz -C dawn --strip-components=1

      - name: Test
        id: ggml-ci
        env:
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
            bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-mac-vulkan:
    needs: determine-tag
    runs-on: [self-hosted, macOS, ARM64]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        env:
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-linux-intel-vulkan:
    needs: determine-tag
    runs-on: [self-hosted, Linux, Intel]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          persist-credentials: false

      - name: Test
        id: ggml-ci
        env:
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-win-intel-vulkan:
    needs: determine-tag
    runs-on: [self-hosted, Windows, X64, Intel]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        shell: C:\msys64\usr\bin\bash.exe --noprofile --norc -eo pipefail "{0}"
        env:
          MSYSTEM: UCRT64
          CHERE_INVOKING: 1
          PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          vulkaninfo --summary
          # Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
          # a valid python environment for testing
          LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp

  ggml-ci-intel-openvino-gpu-low-perf:
    needs: determine-tag
    runs-on: [self-hosted, Linux, Intel, OpenVINO]

    concurrency:
      group: openvino-gpu-${{ github.head_ref || github.ref }}
      cancel-in-progress: false

    env:
      # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
      OPENVINO_VERSION_MAJOR: "2026.0"
      OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Setup OpenVINO Toolkit
        uses: ./.github/actions/linux-setup-openvino
        with:
          path: ./openvino_toolkit
          version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
          version_full: ${{ env.OPENVINO_VERSION_FULL }}

      - name: Install OpenVINO dependencies
        run: |
          cd ./openvino_toolkit
          chmod +x ./install_dependencies/install_openvino_dependencies.sh
          echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh

      - name: Test
        id: ggml-ci
        env:
          HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
        run: |
          source ./openvino_toolkit/setupvars.sh
          GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt