ci : refactor (#23789)

* ci : separate CUDA windows workflow + fix names * ci : rename workflow * ci : prefix cache names with workflow name * ci : rename build.yml -> build-cpu.yml * ci : cache keys * ci : fix windows cuda/hip concurrency of release workflow * ci : fix apple cache names * ci : add TODOs * cont : keep just the last cache * ci : update release concurrency to queue * ci : move the release trigger to ubuntu-slim * ci : hip add TODO * cont : improve words Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2026-06-01 06:00:36 +00:00 · 2026-05-28 09:44:25 +03:00 · 2026-05-28 09:44:25 +03:00 · 491c4d7d2e
commit 491c4d7d2e
parent 939a7dd648
18 changed files with 342 additions and 303 deletions
--- a/.github/workflows/build-android.yml
+++ b/.github/workflows/build-android.yml
@ -32,7 +32,7 @@ env:
  LLAMA_ARG_LOG_TIMESTAMPS: 1

 jobs:
-  android:
+  default:
    runs-on: ubuntu-latest

    steps:
@ -58,7 +58,7 @@ jobs:
          cd examples/llama.android
          ./gradlew build --no-daemon

-  android-ndk:
+  ndk:
    runs-on: ubuntu-latest
    container:
      image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3'
@ -92,7 +92,7 @@ jobs:
          name: llama-cpp-android-arm64-cpu
          path: pkg-adb/llama.cpp

-  android-arm64:
+  arm64:
    runs-on: ubuntu-latest

    env:
@ -103,12 +103,18 @@ jobs:
        id: checkout
        uses: actions/checkout@v6

-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.21
-        with:
-          key: android-arm64
-          evict-old-files: 1d
-          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+      # note : disabled to spare some cache space (https://github.com/ggml-org/llama.cpp/pull/23789)
+      #        for some reason, the ccache does not improve the build time in this case
+      # example:
+      #   cache off: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78160400831
+      #   cache on:  https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78224189394
+      #
+      #- name: ccache
+      #  uses: ggml-org/ccache-action@v1.2.21
+      #  with:
+      #    key: android-ubuntu-arm64
+      #    evict-old-files: 1d
+      #    save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Set up JDK
        uses: actions/setup-java@v5
--- a/.github/workflows/build-apple.yml
+++ b/.github/workflows/build-apple.yml
@ -48,7 +48,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: macos-latest-arm64
+          key: apple-arm64
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -84,7 +84,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: macos-latest-x64
+          key: apple-x64
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -117,10 +117,11 @@ jobs:
        id: checkout
        uses: actions/checkout@v6

+      # TODO: this likely does not do anything - if yes, remove it
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: macos-latest-ios
+          key: apple-ios
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -197,10 +198,11 @@ jobs:
        id: checkout
        uses: actions/checkout@v6

+      # TODO: this likely does not do anything - if yes, remove it
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: macos-latest-tvos
+          key: apple-tvos
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -230,6 +232,14 @@ jobs:
        id: checkout
        uses: actions/checkout@v6

+      # TODO: this likely does not do anything - if yes, remove it
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.21
+        with:
+          key: apple-visionos
+          evict-old-files: 1d
+          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+
      - name: Build
        id: cmake_build
        run: |
@ -261,10 +271,11 @@ jobs:
        id: checkout
        uses: actions/checkout@v6

+      # TODO: this likely does not do anything - if yes, remove it
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: macos-latest-swift
+          key: apple-swift
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

--- a/.github/workflows/build-cpu.yml
+++ b/.github/workflows/build-cpu.yml
@ -1,4 +1,4 @@
-name: CI
+name: CI (cpu)

 on:
  workflow_dispatch: # allows manual triggering
@ -6,7 +6,7 @@ on:
    branches:
      - master
    paths: [
-      '.github/workflows/build.yml',
+      '.github/workflows/build-cpu.yml',
      '.github/workflows/build-cmake-pkg.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
@ -27,7 +27,7 @@ on:
  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
-      '.github/workflows/build.yml',
+      '.github/workflows/build-cpu.yml',
      '.github/workflows/build-cmake-pkg.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
@ -60,7 +60,7 @@ jobs:
  build-cmake-pkg:
    uses: ./.github/workflows/build-cmake-pkg.yml

-  ubuntu-cpu:
+  ubuntu:
    strategy:
      matrix:
        include:
@ -79,7 +79,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-cpu-${{ matrix.build }}
+          key: cpu-${{ matrix.os }}
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -131,46 +131,7 @@ jobs:
          ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
          ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256

-  ubuntu-24-vulkan:
-    strategy:
-      matrix:
-        include:
-          - build: 'x64'
-            os: ubuntu-24.04
-          - build: 'arm64'
-            os: ubuntu-24.04-arm
-
-    runs-on: ${{ matrix.os }}
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
-
-      - name: Dependencies
-        id: depends
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build
-          echo "CC=gcc-14" >> "$GITHUB_ENV"
-          echo "CXX=g++-14" >> "$GITHUB_ENV"
-
-      - name: Configure
-        id: cmake_configure
-        run: |
-          cmake -B build \
-            -G "Ninja" \
-            -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-            -DGGML_BACKEND_DL=ON \
-            -DGGML_CPU_ALL_VARIANTS=ON \
-            -DGGML_VULKAN=ON
-
-      - name: Build
-        id: cmake_build
-        run: |
-          time cmake --build build -j $(nproc)
-
-  windows-latest:
+  windows:
    runs-on: windows-2025

    env:
@ -202,7 +163,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: windows-latest-${{ matrix.build }}
+          key: cpu-windows-2025-${{ matrix.build }}
          variant: ccache
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
@ -268,88 +229,3 @@ jobs:
      #     cd build
      #     $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
      #     & $sde -future -- ctest -L main -C Release --verbose --timeout 900
-
-  ubuntu-latest-cuda:
-    runs-on: ubuntu-latest
-    container: nvidia/cuda:12.6.2-devel-ubuntu24.04
-
-    steps:
-        - name: Clone
-          id: checkout
-          uses: actions/checkout@v6
-
-        - name: Install dependencies
-          env:
-            DEBIAN_FRONTEND: noninteractive
-          run: |
-              apt update
-              apt install -y cmake build-essential ninja-build libgomp1 git libssl-dev
-
-        - name: ccache
-          uses: ggml-org/ccache-action@v1.2.21
-          with:
-            key: ubuntu-latest-cuda
-            evict-old-files: 1d
-            save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
-
-        - name: Build with CMake
-          # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project
-          run: |
-            cmake -S . -B build -G Ninja \
-              -DLLAMA_FATAL_WARNINGS=ON \
-              -DCMAKE_BUILD_TYPE=Release \
-              -DCMAKE_CUDA_ARCHITECTURES=89-real \
-              -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
-              -DGGML_NATIVE=OFF \
-              -DGGML_CUDA=ON \
-              -DGGML_CUDA_CUB_3DOT2=ON
-            cmake --build build
-
-  windows-2022-cuda:
-    runs-on: windows-2022
-
-    strategy:
-      matrix:
-        cuda: ['12.4']
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
-
-      - name: Install ccache
-        uses: ggml-org/ccache-action@v1.2.21
-        with:
-          key: windows-cuda-${{ matrix.cuda }}
-          variant: ccache
-          evict-old-files: 1d
-          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
-
-      - name: Install Cuda Toolkit
-        uses: ./.github/actions/windows-setup-cuda
-        with:
-          cuda_version: ${{ matrix.cuda }}
-
-      - name: Install Ninja
-        id: install_ninja
-        run: |
-          choco install ninja
-
-      - name: Build
-        id: cmake_build
-        shell: cmd
-        # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project
-        run: |
-          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
-          cmake -S . -B build -G "Ninja Multi-Config" ^
-            -DLLAMA_BUILD_SERVER=ON ^
-            -DLLAMA_BUILD_BORINGSSL=ON ^
-            -DGGML_NATIVE=OFF ^
-            -DGGML_BACKEND_DL=ON ^
-            -DGGML_CPU_ALL_VARIANTS=ON ^
-            -DGGML_CUDA=ON ^
-            -DGGML_RPC=ON ^
-            -DGGML_CUDA_CUB_3DOT2=ON
-          set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
-          cmake --build build --config Release -j %NINJA_JOBS% -t ggml
-          cmake --build build --config Release
--- a/.github/workflows/build-cuda-ubuntu.yml
+++ b/.github/workflows/build-cuda-ubuntu.yml
@ -1,4 +1,4 @@
-name: CI (hip)
+name: CI (CUDA, ubuntu)

 on:
  workflow_dispatch: # allows manual triggering
@ -6,7 +6,7 @@ on:
    branches:
      - master
    paths: [
-      '.github/workflows/build-hip.yml',
+      '.github/workflows/build-cuda-ubuntu.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
@ -20,7 +20,7 @@ on:
  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
-      '.github/workflows/build-hip.yml',
+      '.github/workflows/build-cuda-ubuntu.yml',
      'ggml/src/ggml-cuda/**'
    ]

@ -36,8 +36,43 @@ env:
  LLAMA_ARG_LOG_TIMESTAMPS: 1

 jobs:
+  cuda:
+    runs-on: ubuntu-24.04
+    container: nvidia/cuda:12.6.2-devel-ubuntu24.04

-  ubuntu-22-hip:
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v6
+
+      - name: Install dependencies
+        env:
+          DEBIAN_FRONTEND: noninteractive
+        run: |
+          apt update
+          apt install -y cmake build-essential ninja-build libgomp1 git libssl-dev
+
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.21
+        with:
+          key: cuda-ubuntu-24.04-cuda
+          evict-old-files: 1d
+          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+
+      - name: Build with CMake
+        # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project
+        run: |
+          cmake -S . -B build -G Ninja \
+            -DLLAMA_FATAL_WARNINGS=ON \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DCMAKE_CUDA_ARCHITECTURES=89-real \
+            -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
+            -DGGML_NATIVE=OFF \
+            -DGGML_CUDA=ON \
+            -DGGML_CUDA_CUB_3DOT2=ON
+          cmake --build build
+
+  hip:
    runs-on: ubuntu-22.04
    container: rocm/dev-ubuntu-22.04:6.1.2

@ -55,7 +90,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-22-hip
+          key: cuda-ubuntu-22.04-hip
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -69,75 +104,7 @@ jobs:
            -DGGML_HIP=ON
          cmake --build build --config Release -j $(nproc)

-  windows-latest-hip:
-    runs-on: windows-2022
-
-    env:
-      # Make sure this is in sync with build-cache.yml
-      HIPSDK_INSTALLER_VERSION: "26.Q1"
-
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v6
-
-      - name: Grab rocWMMA package
-        id: grab_rocwmma
-        run: |
-          curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70201-81~24.04_amd64.deb"
-          7z x rocwmma.deb
-          7z x data.tar
-
-      - name: Use ROCm Installation Cache
-        uses: actions/cache@v5
-        id: cache-rocm
-        with:
-          path: C:\Program Files\AMD\ROCm
-          key: cache-gha-rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
-
-      - name: Setup ROCm
-        if: steps.cache-rocm.outputs.cache-hit != 'true'
-        uses: ./.github/actions/windows-setup-rocm
-        with:
-          version: ${{ env.HIPSDK_INSTALLER_VERSION }}
-
-      - name: Verify ROCm
-        id: verify
-        run: |
-          # Find and test ROCm installation
-          $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
-          if (-not $clangPath) {
-            Write-Error "ROCm installation not found"
-            exit 1
-          }
-          & $clangPath.FullName --version
-
-      - name: Install ccache
-        uses: ggml-org/ccache-action@v1.2.21
-        with:
-          key: ${{ github.job }}
-          evict-old-files: 1d
-          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
-
-      - name: Build
-        id: cmake_build
-        run: |
-          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
-          $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
-          cmake -G "Unix Makefiles" -B build -S . `
-            -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
-            -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
-            -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.1/include/" `
-            -DCMAKE_BUILD_TYPE=Release `
-            -DLLAMA_BUILD_BORINGSSL=ON `
-            -DROCM_DIR="${env:HIP_PATH}" `
-            -DGGML_HIP=ON `
-            -DGGML_HIP_ROCWMMA_FATTN=ON `
-            -DGPU_TARGETS="gfx1100"  `
-            -DGGML_RPC=ON
-          cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
-
-  ubuntu-22-musa:
+  musa:
    runs-on: ubuntu-22.04
    container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64

@ -155,7 +122,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-22-musa
+          key: cuda-ubuntu-22.04-musa
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

--- a/.github/workflows/build-cuda-windows.yml
+++ b/.github/workflows/build-cuda-windows.yml
@ -0,0 +1,146 @@
+name: CI (CUDA, windows)
+
+# TODO: this workflow is only triggered manually because it is very heavy on the CI
+#       when we provision dedicated windows runners, we can enable it for pushes too
+# note: running this workflow manually will populate the ccache for the release builds
+#       this can be used before merging a PR to speed up the release workflow
+on:
+  workflow_dispatch: # allows manual triggering
+
+# note: this will run in queue with the release workflow
+concurrency:
+  group: release
+  queue: max
+
+env:
+  GGML_NLOOP: 3
+  GGML_N_THREADS: 1
+  LLAMA_ARG_LOG_COLORS: 1
+  LLAMA_ARG_LOG_PREFIX: 1
+  LLAMA_ARG_LOG_TIMESTAMPS: 1
+
+jobs:
+  cuda:
+    runs-on: windows-2022
+
+    strategy:
+      matrix:
+        cuda: ['12.4', '13.3']
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v6
+
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.21
+        with:
+          key: release-windows-2022-x64-cuda-${{ matrix.cuda }}
+          append-timestamp: false # note: use this only with non-concurrent jobs!
+
+      - name: Install Cuda Toolkit
+        uses: ./.github/actions/windows-setup-cuda
+        with:
+          cuda_version: ${{ matrix.cuda }}
+
+      - name: Install Ninja
+        id: install_ninja
+        run: |
+          choco install ninja
+
+      - name: Build
+        id: cmake_build
+        shell: cmd
+        # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project
+        run: |
+          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
+          cmake -S . -B build -G "Ninja Multi-Config" ^
+            -DLLAMA_BUILD_SERVER=ON ^
+            -DLLAMA_BUILD_BORINGSSL=ON ^
+            -DGGML_NATIVE=OFF ^
+            -DGGML_BACKEND_DL=ON ^
+            -DGGML_CPU_ALL_VARIANTS=ON ^
+            -DGGML_CUDA=ON ^
+            -DGGML_RPC=ON ^
+            -DGGML_CUDA_CUB_3DOT2=ON
+          set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
+          cmake --build build --config Release -j %NINJA_JOBS% -t ggml
+          cmake --build build --config Release
+
+  hip:
+    runs-on: windows-2022
+
+    env:
+      # Make sure this is in sync with build-cache.yml
+      HIPSDK_INSTALLER_VERSION: "26.Q1"
+
+    strategy:
+      matrix:
+        include:
+          # sync with release.yml
+          - name: "radeon"
+            gpu_targets: "gfx1150;gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v6
+
+      - name: Grab rocWMMA package
+        id: grab_rocwmma
+        run: |
+          curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70201-81~24.04_amd64.deb"
+          7z x rocwmma.deb
+          7z x data.tar
+
+      - name: Use ROCm Installation Cache
+        uses: actions/cache@v5
+        id: cache-rocm
+        with:
+          path: C:\Program Files\AMD\ROCm
+          key: cache-gha-rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
+
+      - name: Setup ROCm
+        if: steps.cache-rocm.outputs.cache-hit != 'true'
+        uses: ./.github/actions/windows-setup-rocm
+        with:
+          version: ${{ env.HIPSDK_INSTALLER_VERSION }}
+
+      - name: Verify ROCm
+        id: verify
+        run: |
+          # Find and test ROCm installation
+          $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
+          if (-not $clangPath) {
+            Write-Error "ROCm installation not found"
+            exit 1
+          }
+          & $clangPath.FullName --version
+
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.21
+        with:
+          # TODO: this build does not match the build in release.yml, so we use a different cache key
+          #       ideally, the builds should match, similar to the CUDA build above so that we would be able
+          #       to populate the ccache for the release with manual runs of this workflow
+          #key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}
+          key: cuda-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}
+          append-timestamp: false # note: use this only with non-concurrent jobs!
+
+      - name: Build
+        id: cmake_build
+        run: |
+          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
+          $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
+          cmake -G "Unix Makefiles" -B build -S . `
+            -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
+            -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
+            -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.1/include/" `
+            -DCMAKE_BUILD_TYPE=Release `
+            -DLLAMA_BUILD_BORINGSSL=ON `
+            -DROCM_DIR="${env:HIP_PATH}" `
+            -DGGML_HIP=ON `
+            -DGGML_HIP_ROCWMMA_FATTN=ON `
+            -DGPU_TARGETS="gfx1100"  `
+            -DGGML_RPC=ON
+          cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
--- a/.github/workflows/build-msys.yml
+++ b/.github/workflows/build-msys.yml
@ -37,7 +37,7 @@ jobs:
      #- name: ccache
      #  uses: ggml-org/ccache-action@v1.2.16
      #  with:
-      #    key: windows-msys2
+      #    key: msys-windows-2025-x64
      #    variant: ccache
      #    evict-old-files: 1d
      #    save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
--- a/.github/workflows/build-opencl.yml
+++ b/.github/workflows/build-opencl.yml
@ -35,8 +35,7 @@ env:
  LLAMA_ARG_LOG_TIMESTAMPS: 1

 jobs:
-
-  windows-latest-opencl-adreno:
+  windows-2025-opencl-adreno:
    runs-on: windows-2025

    steps:
@ -47,7 +46,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: windows-latest-llvm-arm64-opencl-adreno
+          key: opencl-windows-2025-x64
          variant: ccache
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
--- a/.github/workflows/build-openvino.yml
+++ b/.github/workflows/build-openvino.yml
@ -67,7 +67,7 @@ jobs:
        if: runner.environment == 'github-hosted'
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-24-openvino-${{ matrix.variant }}-no-preset-v1
+          key: openvino-ubuntu-24.04-${{ matrix.variant }}-no-preset-v1
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

--- a/.github/workflows/build-riscv.yml
+++ b/.github/workflows/build-riscv.yml
@ -69,7 +69,7 @@ jobs:
      #- name: ccache
      #  uses: ggml-org/ccache-action@afde29e5b5422e5da23cb1f639e8baecadeadfc3 # https://github.com/ggml-org/ccache-action/pull/1
      #  with:
-      #    key: ubuntu-cpu-riscv64-native
+      #    key: riscv-ubuntu-native
      #    evict-old-files: 1d
      #    save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -139,7 +139,7 @@ jobs:
      #- name: ccache
      #  uses: ggml-org/ccache-action@afde29e5b5422e5da23cb1f639e8baecadeadfc3 # https://github.com/ggml-org/ccache-action/pull/1
      #  with:
-      #    key: ubuntu-riscv64-native-sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}
+      #    key: riscv-ubuntu-native-sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}
      #    evict-old-files: 1d
      #    save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

--- a/.github/workflows/build-rpc.yml
+++ b/.github/workflows/build-rpc.yml
@ -34,7 +34,6 @@ env:
  LLAMA_ARG_LOG_TIMESTAMPS: 1

 jobs:
-
  ubuntu-latest-rpc:
    runs-on: ubuntu-latest

--- a/.github/workflows/build-sanitize.yml
+++ b/.github/workflows/build-sanitize.yml
@ -41,19 +41,6 @@ jobs:
        id: checkout
        uses: actions/checkout@v6

-      #- name: ccache
-      #  uses: ggml-org/ccache-action@v1.2.21
-      #  with:
-      #    key: ubuntu-latest-sanitizer-${{ matrix.sanitizer }}
-      #    evict-old-files: 1d
-      #    save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
-
-      #- name: Dependencies
-      #  id: depends
-      #  run: |
-      #    sudo apt-get update
-      #    sudo apt-get install build-essential libssl-dev
-
      # with UNDEFINED sanitizer, we have to build in Debug to avoid GCC 13 false-positive warnings
      - name: Build (undefined)
        id: cmake_build_undefined
--- a/.github/workflows/build-self-hosted.yml
+++ b/.github/workflows/build-self-hosted.yml
@ -396,14 +396,6 @@ jobs:
          sudo apt-get update
          sudo apt-get install -y cmake

-      # note: sparing some ccache since these jobs run on dedicated runners that are not part of the organitzation
-      #- name: ccache
-      #  uses: ggml-org/ccache-action@v1.2.21
-      #  with:
-      #    key: arm64-cpu-kleidiai-graviton4
-      #    evict-old-files: 1d
-      #    save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
-
      - name: Test
        id: ggml-ci
        run: |
--- a/.github/workflows/build-sycl.yml
+++ b/.github/workflows/build-sycl.yml
@ -88,7 +88,7 @@ jobs:
 #      - name: ccache
 #        uses: ggml-org/ccache-action@v1.2.21
 #        with:
-#          key: ubuntu-24-sycl-${{ matrix.build }}
+#          key: sycl-ubuntu-24-${{ matrix.build }}
 #          evict-old-files: 1d
 #          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
 #
@ -150,7 +150,7 @@ jobs:
 #      - name: ccache
 #        uses: ggml-org/ccache-action@v1.2.21
 #        with:
-#          key: windows-latest-sycl
+#          key: sycl-windows-latest
 #          variant: ccache
 #          evict-old-files: 1d
 #          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
--- a/.github/workflows/build-vulkan.yml
+++ b/.github/workflows/build-vulkan.yml
@ -36,7 +36,54 @@ env:
  LLAMA_ARG_LOG_TIMESTAMPS: 1

 jobs:
-  ubuntu-24-vulkan-llvmpipe:
+  ubuntu:
+    strategy:
+      matrix:
+        include:
+          - build: 'x64'
+            os: ubuntu-24.04
+          - build: 'arm64'
+            os: ubuntu-24.04-arm
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v6
+
+      - name: ccache
+        uses: ggml-org/ccache-action@v1.2.21
+        with:
+          key: vulkan-${{ matrix.os }}
+          variant: ccache
+          evict-old-files: 1d
+          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+
+      - name: Dependencies
+        id: depends
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build
+          echo "CC=gcc-14" >> "$GITHUB_ENV"
+          echo "CXX=g++-14" >> "$GITHUB_ENV"
+
+      - name: Configure
+        id: cmake_configure
+        run: |
+          cmake -B build \
+            -G "Ninja" \
+            -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+            -DGGML_BACKEND_DL=ON \
+            -DGGML_CPU_ALL_VARIANTS=ON \
+            -DGGML_VULKAN=ON
+
+      - name: Build
+        id: cmake_build
+        run: |
+          time cmake --build build -j $(nproc)
+
+  ubuntu-llvmpipe:
    runs-on: ubuntu-24.04

    steps:
@ -47,7 +94,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-24-vulkan-llvmpipe
+          key: vulkan-ubuntu-24.04-llvmpipe
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

--- a/.github/workflows/build-webgpu.yml
+++ b/.github/workflows/build-webgpu.yml
@ -35,7 +35,7 @@ env:
  LLAMA_ARG_LOG_TIMESTAMPS: 1

 jobs:
-  macos-latest-webgpu:
+  macos:
    runs-on: macos-latest

    steps:
@ -46,7 +46,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: macos-latest-webgpu
+          key: webgpu-macos-latest
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -76,7 +76,7 @@ jobs:
          cd build
          ctest -L main --verbose --timeout 900

-  ubuntu-24-webgpu:
+  ubuntu:
    runs-on: ubuntu-24.04

    steps:
@ -87,7 +87,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-24-webgpu
+          key: webgpu-ubuntu-24.04
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -129,8 +129,16 @@ jobs:
          # test-backend-ops is too slow on llvmpipe, skip it
          ctest -L main -E test-backend-ops --verbose --timeout 900

-  ubuntu-24-webgpu-wasm:
-    runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
+  ubuntu-wasm:
+    strategy:
+      matrix:
+        include:
+          - build: 'x64'
+            os: ubuntu-24.04
+          - build: 'arm64'
+            os: ubuntu-24.04-arm
+
+    runs-on: ${{ matrix.os }}

    steps:
      - name: Clone
@ -140,7 +148,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-24-webgpu-wasm
+          key: webgpu-${{ matrix.os }}-wasm
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

--- a/.github/workflows/hip-quality-check.yml
+++ b/.github/workflows/hip-quality-check.yml
@ -50,7 +50,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-22-hip-quality-check
+          key: hip-quality-check-ubuntu-22.04
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -27,18 +27,18 @@ on:
      '**/*.glsl'
    ]

-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
-  cancel-in-progress: true
-
 env:
  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
  CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"

-jobs:
+# note: run this workflow one at a time for better cache reuse
+concurrency:
+  group: release
+  queue: max

+jobs:
  check_release:
-    runs-on: [self-hosted, fast]
+    runs-on: ubuntu-slim

    outputs:
      should_release: ${{ steps.check.outputs.should_release }}
@ -100,8 +100,8 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: macos-latest-${{ matrix.arch }}
-          evict-old-files: 1d
+          key: release-${{ matrix.os }}-${{ matrix.arch }}
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Build
        id: cmake_build
@ -165,8 +165,8 @@ jobs:
        if: ${{ matrix.build != 's390x' }}
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-cpu-${{ matrix.build }}
-          evict-old-files: 1d
+          key: release-${{ matrix.os }}-cpu
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Dependencies
        id: depends
@ -241,8 +241,8 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-vulkan-${{ matrix.build }}
-          evict-old-files: 1d
+          key: release-${{ matrix.os }}-vulkan
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Dependencies
        id: depends
@ -311,11 +311,17 @@ jobs:
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

-      - name: ccache
-        uses: ggml-org/ccache-action@v1.2.21
-        with:
-          key: android-arm64
-          evict-old-files: 1d
+      # note : disabled to spare some cache space (https://github.com/ggml-org/llama.cpp/pull/23789)
+      #        for some reason, the ccache does not improve the build time in this case
+      # example:
+      #   cache off: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78160400831
+      #   cache on:  https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78224189394
+      #
+      #- name: ccache
+      #  uses: ggml-org/ccache-action@v1.2.21
+      #  with:
+      #    key: release-android-arm64
+      #    append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Set up JDK
        uses: actions/setup-java@v5
@ -402,8 +408,8 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-24-openvino-release-no-preset-v1
-          evict-old-files: 1d
+          key: release-ubuntu-24.04-openvino-release-no-preset-v1
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Dependencies
        run: |
@ -485,9 +491,8 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: windows-latest-cpu-${{ matrix.arch }}
-          variant: ccache
-          evict-old-files: 1d
+          key: release-windows-2025-${{ matrix.arch }}-cpu
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Install Ninja
        run: |
@ -556,9 +561,8 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: windows-latest-${{ matrix.backend }}-${{ matrix.arch }}
-          variant: ccache
-          evict-old-files: 1d
+          key: release-windows-2025-${{ matrix.arch }}-${{ matrix.backend }}
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Install Vulkan SDK
        id: get_vulkan
@ -633,12 +637,11 @@ jobs:
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

-      - name: Install ccache
+      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: windows-cuda-${{ matrix.cuda }}
-          variant: ccache
-          evict-old-files: 1d
+          key: release-windows-2022-x64-cuda-${{ matrix.cuda }}
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Install Cuda Toolkit
        uses: ./.github/actions/windows-setup-cuda
@ -744,9 +747,8 @@ jobs:
 #      - name: ccache
 #        uses: ggml-org/ccache-action@v1.2.21
 #        with:
-#          key: windows-latest-sycl
-#          variant: ccache
-#          evict-old-files: 1d
+#          key: release-windows-2022-x64-sycl
+#          append-timestamp: false # note: use this only with non-concurrent jobs!
 #
 #      - name: Build
 #        id: cmake_build
@ -866,9 +868,8 @@ jobs:
 #      - name: ccache
 #        uses: ggml-org/ccache-action@v1.2.21
 #        with:
-#          key: ubuntu-24-sycl-${{ matrix.build }}
-#          evict-old-files: 1d
-#          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
+#          key: release-ubuntu-24.04-sycl
+#          append-timestamp: false # note: use this only with non-concurrent jobs!
 #
 #      - name: Build
 #        id: cmake_build
@ -936,8 +937,8 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }}
-          evict-old-files: 1d
+          key: release-ubuntu-22.04-rocm-${{ matrix.ROCM_VERSION }}
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Dependencies
        id: depends
@ -1058,8 +1059,8 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: windows-latest-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64
-          evict-old-files: 1d
+          key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}
+          append-timestamp: false # note: use this only with non-concurrent jobs!

      - name: Install ROCm
        if: steps.cache-rocm.outputs.cache-hit != 'true'
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@ -55,7 +55,7 @@ concurrency:

 jobs:
  ubuntu:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04

    name: ubuntu (${{ matrix.wf_name }})
    strategy:
@ -96,7 +96,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: server-ubuntu-default
+          key: server-ubuntu-24.04-x64
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

@ -144,7 +144,7 @@ jobs:
      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
-          key: server-windows-default
+          key: server-windows-2025-x64
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}