diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml index 713ccdc7f..a05248e12 100644 --- a/.github/workflows/build-android.yml +++ b/.github/workflows/build-android.yml @@ -32,7 +32,7 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - android: + default: runs-on: ubuntu-latest steps: @@ -58,7 +58,7 @@ jobs: cd examples/llama.android ./gradlew build --no-daemon - android-ndk: + ndk: runs-on: ubuntu-latest container: image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3' @@ -92,7 +92,7 @@ jobs: name: llama-cpp-android-arm64-cpu path: pkg-adb/llama.cpp - android-arm64: + arm64: runs-on: ubuntu-latest env: @@ -103,12 +103,18 @@ jobs: id: checkout uses: actions/checkout@v6 - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: android-arm64 - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + # note : disabled to spare some cache space (https://github.com/ggml-org/llama.cpp/pull/23789) + # for some reason, the ccache does not improve the build time in this case + # example: + # cache off: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78160400831 + # cache on: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78224189394 + # + #- name: ccache + # uses: ggml-org/ccache-action@v1.2.21 + # with: + # key: android-ubuntu-arm64 + # evict-old-files: 1d + # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - name: Set up JDK uses: actions/setup-java@v5 diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml index d2c99d0d5..54a3ed859 100644 --- a/.github/workflows/build-apple.yml +++ b/.github/workflows/build-apple.yml @@ -48,7 +48,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-arm64 + key: apple-arm64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -84,7 +84,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-x64 + key: apple-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -117,10 +117,11 @@ jobs: id: checkout uses: actions/checkout@v6 + # TODO: this likely does not do anything - if yes, remove it - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-ios + key: apple-ios evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -197,10 +198,11 @@ jobs: id: checkout uses: actions/checkout@v6 + # TODO: this likely does not do anything - if yes, remove it - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-tvos + key: apple-tvos evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -230,6 +232,14 @@ jobs: id: checkout uses: actions/checkout@v6 + # TODO: this likely does not do anything - if yes, remove it + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: apple-visionos + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + - name: Build id: cmake_build run: | @@ -261,10 +271,11 @@ jobs: id: checkout uses: actions/checkout@v6 + # TODO: this likely does not do anything - if yes, remove it - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-swift + key: apple-swift evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build.yml b/.github/workflows/build-cpu.yml similarity index 67% rename from .github/workflows/build.yml rename to .github/workflows/build-cpu.yml index 1deab2315..ee0f8b888 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build-cpu.yml @@ -1,4 +1,4 @@ -name: CI +name: CI (cpu) on: workflow_dispatch: # allows manual triggering @@ -6,7 +6,7 @@ on: branches: - master paths: [ - '.github/workflows/build.yml', + '.github/workflows/build-cpu.yml', '.github/workflows/build-cmake-pkg.yml', '**/CMakeLists.txt', '**/.cmake', @@ -27,7 +27,7 @@ on: pull_request: types: [opened, synchronize, reopened] paths: [ - '.github/workflows/build.yml', + '.github/workflows/build-cpu.yml', '.github/workflows/build-cmake-pkg.yml', '**/CMakeLists.txt', '**/.cmake', @@ -60,7 +60,7 @@ jobs: build-cmake-pkg: uses: ./.github/workflows/build-cmake-pkg.yml - ubuntu-cpu: + ubuntu: strategy: matrix: include: @@ -79,7 +79,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-cpu-${{ matrix.build }} + key: cpu-${{ matrix.os }} evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -131,46 +131,7 @@ jobs: ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - ubuntu-24-vulkan: - strategy: - matrix: - include: - - build: 'x64' - os: ubuntu-24.04 - - build: 'arm64' - os: ubuntu-24.04-arm - - runs-on: ${{ matrix.os }} - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build - echo "CC=gcc-14" >> "$GITHUB_ENV" - echo "CXX=g++-14" >> "$GITHUB_ENV" - - - name: Configure - id: cmake_configure - run: | - cmake -B build \ - -G "Ninja" \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DGGML_BACKEND_DL=ON \ - -DGGML_CPU_ALL_VARIANTS=ON \ - -DGGML_VULKAN=ON - - - name: Build - id: cmake_build - run: | - time cmake --build build -j $(nproc) - - windows-latest: + windows: runs-on: windows-2025 env: @@ -202,7 +163,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-${{ matrix.build }} + key: cpu-windows-2025-${{ matrix.build }} variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -268,88 +229,3 @@ jobs: # cd build # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 # & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - - ubuntu-latest-cuda: - runs-on: ubuntu-latest - container: nvidia/cuda:12.6.2-devel-ubuntu24.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Install dependencies - env: - DEBIAN_FRONTEND: noninteractive - run: | - apt update - apt install -y cmake build-essential ninja-build libgomp1 git libssl-dev - - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: ubuntu-latest-cuda - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Build with CMake - # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project - run: | - cmake -S . -B build -G Ninja \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CUDA_ARCHITECTURES=89-real \ - -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ - -DGGML_NATIVE=OFF \ - -DGGML_CUDA=ON \ - -DGGML_CUDA_CUB_3DOT2=ON - cmake --build build - - windows-2022-cuda: - runs-on: windows-2022 - - strategy: - matrix: - cuda: ['12.4'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Install ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: windows-cuda-${{ matrix.cuda }} - variant: ccache - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Install Cuda Toolkit - uses: ./.github/actions/windows-setup-cuda - with: - cuda_version: ${{ matrix.cuda }} - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Build - id: cmake_build - shell: cmd - # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project - run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 - cmake -S . -B build -G "Ninja Multi-Config" ^ - -DLLAMA_BUILD_SERVER=ON ^ - -DLLAMA_BUILD_BORINGSSL=ON ^ - -DGGML_NATIVE=OFF ^ - -DGGML_BACKEND_DL=ON ^ - -DGGML_CPU_ALL_VARIANTS=ON ^ - -DGGML_CUDA=ON ^ - -DGGML_RPC=ON ^ - -DGGML_CUDA_CUB_3DOT2=ON - set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 - cmake --build build --config Release -j %NINJA_JOBS% -t ggml - cmake --build build --config Release diff --git a/.github/workflows/build-hip.yml b/.github/workflows/build-cuda-ubuntu.yml similarity index 53% rename from .github/workflows/build-hip.yml rename to .github/workflows/build-cuda-ubuntu.yml index ff8283ae7..6271b22cb 100644 --- a/.github/workflows/build-hip.yml +++ b/.github/workflows/build-cuda-ubuntu.yml @@ -1,4 +1,4 @@ -name: CI (hip) +name: CI (CUDA, ubuntu) on: workflow_dispatch: # allows manual triggering @@ -6,7 +6,7 @@ on: branches: - master paths: [ - '.github/workflows/build-hip.yml', + '.github/workflows/build-cuda-ubuntu.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', @@ -20,7 +20,7 @@ on: pull_request: types: [opened, synchronize, reopened] paths: [ - '.github/workflows/build-hip.yml', + '.github/workflows/build-cuda-ubuntu.yml', 'ggml/src/ggml-cuda/**' ] @@ -36,8 +36,43 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: + cuda: + runs-on: ubuntu-24.04 + container: nvidia/cuda:12.6.2-devel-ubuntu24.04 - ubuntu-22-hip: + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Install dependencies + env: + DEBIAN_FRONTEND: noninteractive + run: | + apt update + apt install -y cmake build-essential ninja-build libgomp1 git libssl-dev + + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: cuda-ubuntu-24.04-cuda + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Build with CMake + # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project + run: | + cmake -S . -B build -G Ninja \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_ARCHITECTURES=89-real \ + -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ + -DGGML_NATIVE=OFF \ + -DGGML_CUDA=ON \ + -DGGML_CUDA_CUB_3DOT2=ON + cmake --build build + + hip: runs-on: ubuntu-22.04 container: rocm/dev-ubuntu-22.04:6.1.2 @@ -55,7 +90,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-22-hip + key: cuda-ubuntu-22.04-hip evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -69,75 +104,7 @@ jobs: -DGGML_HIP=ON cmake --build build --config Release -j $(nproc) - windows-latest-hip: - runs-on: windows-2022 - - env: - # Make sure this is in sync with build-cache.yml - HIPSDK_INSTALLER_VERSION: "26.Q1" - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: Grab rocWMMA package - id: grab_rocwmma - run: | - curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70201-81~24.04_amd64.deb" - 7z x rocwmma.deb - 7z x data.tar - - - name: Use ROCm Installation Cache - uses: actions/cache@v5 - id: cache-rocm - with: - path: C:\Program Files\AMD\ROCm - key: cache-gha-rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} - - - name: Setup ROCm - if: steps.cache-rocm.outputs.cache-hit != 'true' - uses: ./.github/actions/windows-setup-rocm - with: - version: ${{ env.HIPSDK_INSTALLER_VERSION }} - - - name: Verify ROCm - id: verify - run: | - # Find and test ROCm installation - $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1 - if (-not $clangPath) { - Write-Error "ROCm installation not found" - exit 1 - } - & $clangPath.FullName --version - - - name: Install ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: ${{ github.job }} - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Build - id: cmake_build - run: | - $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) - $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - cmake -G "Unix Makefiles" -B build -S . ` - -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` - -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` - -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.1/include/" ` - -DCMAKE_BUILD_TYPE=Release ` - -DLLAMA_BUILD_BORINGSSL=ON ` - -DROCM_DIR="${env:HIP_PATH}" ` - -DGGML_HIP=ON ` - -DGGML_HIP_ROCWMMA_FATTN=ON ` - -DGPU_TARGETS="gfx1100" ` - -DGGML_RPC=ON - cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - - ubuntu-22-musa: + musa: runs-on: ubuntu-22.04 container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64 @@ -155,7 +122,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-22-musa + key: cuda-ubuntu-22.04-musa evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-cuda-windows.yml b/.github/workflows/build-cuda-windows.yml new file mode 100644 index 000000000..631ff4ed2 --- /dev/null +++ b/.github/workflows/build-cuda-windows.yml @@ -0,0 +1,146 @@ +name: CI (CUDA, windows) + +# TODO: this workflow is only triggered manually because it is very heavy on the CI +# when we provision dedicated windows runners, we can enable it for pushes too +# note: running this workflow manually will populate the ccache for the release builds +# this can be used before merging a PR to speed up the release workflow +on: + workflow_dispatch: # allows manual triggering + +# note: this will run in queue with the release workflow +concurrency: + group: release + queue: max + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + LLAMA_ARG_LOG_COLORS: 1 + LLAMA_ARG_LOG_PREFIX: 1 + LLAMA_ARG_LOG_TIMESTAMPS: 1 + +jobs: + cuda: + runs-on: windows-2022 + + strategy: + matrix: + cuda: ['12.4', '13.3'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: release-windows-2022-x64-cuda-${{ matrix.cuda }} + append-timestamp: false # note: use this only with non-concurrent jobs! + + - name: Install Cuda Toolkit + uses: ./.github/actions/windows-setup-cuda + with: + cuda_version: ${{ matrix.cuda }} + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Build + id: cmake_build + shell: cmd + # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project + run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + cmake -S . -B build -G "Ninja Multi-Config" ^ + -DLLAMA_BUILD_SERVER=ON ^ + -DLLAMA_BUILD_BORINGSSL=ON ^ + -DGGML_NATIVE=OFF ^ + -DGGML_BACKEND_DL=ON ^ + -DGGML_CPU_ALL_VARIANTS=ON ^ + -DGGML_CUDA=ON ^ + -DGGML_RPC=ON ^ + -DGGML_CUDA_CUB_3DOT2=ON + set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 + cmake --build build --config Release -j %NINJA_JOBS% -t ggml + cmake --build build --config Release + + hip: + runs-on: windows-2022 + + env: + # Make sure this is in sync with build-cache.yml + HIPSDK_INSTALLER_VERSION: "26.Q1" + + strategy: + matrix: + include: + # sync with release.yml + - name: "radeon" + gpu_targets: "gfx1150;gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032" + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Grab rocWMMA package + id: grab_rocwmma + run: | + curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70201-81~24.04_amd64.deb" + 7z x rocwmma.deb + 7z x data.tar + + - name: Use ROCm Installation Cache + uses: actions/cache@v5 + id: cache-rocm + with: + path: C:\Program Files\AMD\ROCm + key: cache-gha-rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }} + + - name: Setup ROCm + if: steps.cache-rocm.outputs.cache-hit != 'true' + uses: ./.github/actions/windows-setup-rocm + with: + version: ${{ env.HIPSDK_INSTALLER_VERSION }} + + - name: Verify ROCm + id: verify + run: | + # Find and test ROCm installation + $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1 + if (-not $clangPath) { + Write-Error "ROCm installation not found" + exit 1 + } + & $clangPath.FullName --version + + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + # TODO: this build does not match the build in release.yml, so we use a different cache key + # ideally, the builds should match, similar to the CUDA build above so that we would be able + # to populate the ccache for the release with manual runs of this workflow + #key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} + key: cuda-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} + append-timestamp: false # note: use this only with non-concurrent jobs! + + - name: Build + id: cmake_build + run: | + $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) + $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" + cmake -G "Unix Makefiles" -B build -S . ` + -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` + -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` + -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.1/include/" ` + -DCMAKE_BUILD_TYPE=Release ` + -DLLAMA_BUILD_BORINGSSL=ON ` + -DROCM_DIR="${env:HIP_PATH}" ` + -DGGML_HIP=ON ` + -DGGML_HIP_ROCWMMA_FATTN=ON ` + -DGPU_TARGETS="gfx1100" ` + -DGGML_RPC=ON + cmake --build build -j ${env:NUMBER_OF_PROCESSORS} diff --git a/.github/workflows/build-msys.yml b/.github/workflows/build-msys.yml index 8214f2b8d..c2633c151 100644 --- a/.github/workflows/build-msys.yml +++ b/.github/workflows/build-msys.yml @@ -37,7 +37,7 @@ jobs: #- name: ccache # uses: ggml-org/ccache-action@v1.2.16 # with: - # key: windows-msys2 + # key: msys-windows-2025-x64 # variant: ccache # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-opencl.yml b/.github/workflows/build-opencl.yml index fccb06b88..251b1f8d5 100644 --- a/.github/workflows/build-opencl.yml +++ b/.github/workflows/build-opencl.yml @@ -35,8 +35,7 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - - windows-latest-opencl-adreno: + windows-2025-opencl-adreno: runs-on: windows-2025 steps: @@ -47,7 +46,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-llvm-arm64-opencl-adreno + key: opencl-windows-2025-x64 variant: ccache evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-openvino.yml b/.github/workflows/build-openvino.yml index 47e04869c..35a955f75 100644 --- a/.github/workflows/build-openvino.yml +++ b/.github/workflows/build-openvino.yml @@ -67,7 +67,7 @@ jobs: if: runner.environment == 'github-hosted' uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-openvino-${{ matrix.variant }}-no-preset-v1 + key: openvino-ubuntu-24.04-${{ matrix.variant }}-no-preset-v1 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-riscv.yml b/.github/workflows/build-riscv.yml index c12aaa61f..70615378b 100644 --- a/.github/workflows/build-riscv.yml +++ b/.github/workflows/build-riscv.yml @@ -69,7 +69,7 @@ jobs: #- name: ccache # uses: ggml-org/ccache-action@afde29e5b5422e5da23cb1f639e8baecadeadfc3 # https://github.com/ggml-org/ccache-action/pull/1 # with: - # key: ubuntu-cpu-riscv64-native + # key: riscv-ubuntu-native # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -139,7 +139,7 @@ jobs: #- name: ccache # uses: ggml-org/ccache-action@afde29e5b5422e5da23cb1f639e8baecadeadfc3 # https://github.com/ggml-org/ccache-action/pull/1 # with: - # key: ubuntu-riscv64-native-sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }} + # key: riscv-ubuntu-native-sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }} # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-rpc.yml b/.github/workflows/build-rpc.yml index c1ff98770..c060b649b 100644 --- a/.github/workflows/build-rpc.yml +++ b/.github/workflows/build-rpc.yml @@ -34,7 +34,6 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - ubuntu-latest-rpc: runs-on: ubuntu-latest diff --git a/.github/workflows/build-sanitize.yml b/.github/workflows/build-sanitize.yml index 29f7a2922..e242abcfd 100644 --- a/.github/workflows/build-sanitize.yml +++ b/.github/workflows/build-sanitize.yml @@ -41,19 +41,6 @@ jobs: id: checkout uses: actions/checkout@v6 - #- name: ccache - # uses: ggml-org/ccache-action@v1.2.21 - # with: - # key: ubuntu-latest-sanitizer-${{ matrix.sanitizer }} - # evict-old-files: 1d - # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - #- name: Dependencies - # id: depends - # run: | - # sudo apt-get update - # sudo apt-get install build-essential libssl-dev - # with UNDEFINED sanitizer, we have to build in Debug to avoid GCC 13 false-positive warnings - name: Build (undefined) id: cmake_build_undefined diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml index 9ff470ea3..381cd3ce1 100644 --- a/.github/workflows/build-self-hosted.yml +++ b/.github/workflows/build-self-hosted.yml @@ -396,14 +396,6 @@ jobs: sudo apt-get update sudo apt-get install -y cmake - # note: sparing some ccache since these jobs run on dedicated runners that are not part of the organitzation - #- name: ccache - # uses: ggml-org/ccache-action@v1.2.21 - # with: - # key: arm64-cpu-kleidiai-graviton4 - # evict-old-files: 1d - # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - name: Test id: ggml-ci run: | diff --git a/.github/workflows/build-sycl.yml b/.github/workflows/build-sycl.yml index b0697f2f2..ef377c818 100644 --- a/.github/workflows/build-sycl.yml +++ b/.github/workflows/build-sycl.yml @@ -88,7 +88,7 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: ubuntu-24-sycl-${{ matrix.build }} +# key: sycl-ubuntu-24-${{ matrix.build }} # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} # @@ -150,7 +150,7 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: windows-latest-sycl +# key: sycl-windows-latest # variant: ccache # evict-old-files: 1d # save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-vulkan.yml b/.github/workflows/build-vulkan.yml index b44f08c6e..e6eab8fd0 100644 --- a/.github/workflows/build-vulkan.yml +++ b/.github/workflows/build-vulkan.yml @@ -36,7 +36,54 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - ubuntu-24-vulkan-llvmpipe: + ubuntu: + strategy: + matrix: + include: + - build: 'x64' + os: ubuntu-24.04 + - build: 'arm64' + os: ubuntu-24.04-arm + + runs-on: ${{ matrix.os }} + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.21 + with: + key: vulkan-${{ matrix.os }} + variant: ccache + evict-old-files: 1d + save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build + echo "CC=gcc-14" >> "$GITHUB_ENV" + echo "CXX=g++-14" >> "$GITHUB_ENV" + + - name: Configure + id: cmake_configure + run: | + cmake -B build \ + -G "Ninja" \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DGGML_BACKEND_DL=ON \ + -DGGML_CPU_ALL_VARIANTS=ON \ + -DGGML_VULKAN=ON + + - name: Build + id: cmake_build + run: | + time cmake --build build -j $(nproc) + + ubuntu-llvmpipe: runs-on: ubuntu-24.04 steps: @@ -47,7 +94,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-vulkan-llvmpipe + key: vulkan-ubuntu-24.04-llvmpipe evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/build-webgpu.yml b/.github/workflows/build-webgpu.yml index c7056358c..1974511a9 100644 --- a/.github/workflows/build-webgpu.yml +++ b/.github/workflows/build-webgpu.yml @@ -35,7 +35,7 @@ env: LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: - macos-latest-webgpu: + macos: runs-on: macos-latest steps: @@ -46,7 +46,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-webgpu + key: webgpu-macos-latest evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -76,7 +76,7 @@ jobs: cd build ctest -L main --verbose --timeout 900 - ubuntu-24-webgpu: + ubuntu: runs-on: ubuntu-24.04 steps: @@ -87,7 +87,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-webgpu + key: webgpu-ubuntu-24.04 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -129,8 +129,16 @@ jobs: # test-backend-ops is too slow on llvmpipe, skip it ctest -L main -E test-backend-ops --verbose --timeout 900 - ubuntu-24-webgpu-wasm: - runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} + ubuntu-wasm: + strategy: + matrix: + include: + - build: 'x64' + os: ubuntu-24.04 + - build: 'arm64' + os: ubuntu-24.04-arm + + runs-on: ${{ matrix.os }} steps: - name: Clone @@ -140,7 +148,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-webgpu-wasm + key: webgpu-${{ matrix.os }}-wasm evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/hip-quality-check.yml b/.github/workflows/hip-quality-check.yml index 5d03b1772..14b9f41a6 100644 --- a/.github/workflows/hip-quality-check.yml +++ b/.github/workflows/hip-quality-check.yml @@ -50,7 +50,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-22-hip-quality-check + key: hip-quality-check-ubuntu-22.04 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 20b2dc915..c3a018425 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -27,18 +27,18 @@ on: '**/*.glsl' ] -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" -jobs: +# note: run this workflow one at a time for better cache reuse +concurrency: + group: release + queue: max +jobs: check_release: - runs-on: [self-hosted, fast] + runs-on: ubuntu-slim outputs: should_release: ${{ steps.check.outputs.should_release }} @@ -100,8 +100,8 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: macos-latest-${{ matrix.arch }} - evict-old-files: 1d + key: release-${{ matrix.os }}-${{ matrix.arch }} + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Build id: cmake_build @@ -165,8 +165,8 @@ jobs: if: ${{ matrix.build != 's390x' }} uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-cpu-${{ matrix.build }} - evict-old-files: 1d + key: release-${{ matrix.os }}-cpu + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Dependencies id: depends @@ -241,8 +241,8 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-vulkan-${{ matrix.build }} - evict-old-files: 1d + key: release-${{ matrix.os }}-vulkan + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Dependencies id: depends @@ -311,11 +311,17 @@ jobs: cache: "npm" cache-dependency-path: "tools/ui/package-lock.json" - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: android-arm64 - evict-old-files: 1d + # note : disabled to spare some cache space (https://github.com/ggml-org/llama.cpp/pull/23789) + # for some reason, the ccache does not improve the build time in this case + # example: + # cache off: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78160400831 + # cache on: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78224189394 + # + #- name: ccache + # uses: ggml-org/ccache-action@v1.2.21 + # with: + # key: release-android-arm64 + # append-timestamp: false # note: use this only with non-concurrent jobs! - name: Set up JDK uses: actions/setup-java@v5 @@ -402,8 +408,8 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-24-openvino-release-no-preset-v1 - evict-old-files: 1d + key: release-ubuntu-24.04-openvino-release-no-preset-v1 + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Dependencies run: | @@ -485,9 +491,8 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-cpu-${{ matrix.arch }} - variant: ccache - evict-old-files: 1d + key: release-windows-2025-${{ matrix.arch }}-cpu + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install Ninja run: | @@ -556,9 +561,8 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-${{ matrix.backend }}-${{ matrix.arch }} - variant: ccache - evict-old-files: 1d + key: release-windows-2025-${{ matrix.arch }}-${{ matrix.backend }} + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install Vulkan SDK id: get_vulkan @@ -633,12 +637,11 @@ jobs: cache: "npm" cache-dependency-path: "tools/ui/package-lock.json" - - name: Install ccache + - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-cuda-${{ matrix.cuda }} - variant: ccache - evict-old-files: 1d + key: release-windows-2022-x64-cuda-${{ matrix.cuda }} + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install Cuda Toolkit uses: ./.github/actions/windows-setup-cuda @@ -744,9 +747,8 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: windows-latest-sycl -# variant: ccache -# evict-old-files: 1d +# key: release-windows-2022-x64-sycl +# append-timestamp: false # note: use this only with non-concurrent jobs! # # - name: Build # id: cmake_build @@ -866,9 +868,8 @@ jobs: # - name: ccache # uses: ggml-org/ccache-action@v1.2.21 # with: -# key: ubuntu-24-sycl-${{ matrix.build }} -# evict-old-files: 1d -# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} +# key: release-ubuntu-24.04-sycl +# append-timestamp: false # note: use this only with non-concurrent jobs! # # - name: Build # id: cmake_build @@ -936,8 +937,8 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-rocm-${{ matrix.ROCM_VERSION }}-${{ matrix.build }} - evict-old-files: 1d + key: release-ubuntu-22.04-rocm-${{ matrix.ROCM_VERSION }} + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Dependencies id: depends @@ -1058,8 +1059,8 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: windows-latest-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}-x64 - evict-old-files: 1d + key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }} + append-timestamp: false # note: use this only with non-concurrent jobs! - name: Install ROCm if: steps.cache-rocm.outputs.cache-hit != 'true' diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index b30e33370..ffdc7c17d 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -55,7 +55,7 @@ concurrency: jobs: ubuntu: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 name: ubuntu (${{ matrix.wf_name }}) strategy: @@ -96,7 +96,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: server-ubuntu-default + key: server-ubuntu-24.04-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} @@ -144,7 +144,7 @@ jobs: - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: server-windows-default + key: server-windows-2025-x64 evict-old-files: 1d save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}