diff --git a/.github/ISSUE_TEMPLATE/01-bug-low.yml b/.github/ISSUE_TEMPLATE/01-bug-low.yml deleted file mode 100644 index 54785854..00000000 --- a/.github/ISSUE_TEMPLATE/01-bug-low.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Low Severity Bugs -description: Used to report low severity bugs in llama.cpp (e.g. cosmetic issues, non critical UI glitches) -title: "Bug: " -labels: ["bug-unconfirmed", "low severity"] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to fill out this bug report! - Please include information about your system, the steps to reproduce the bug, - and the version of llama.cpp that you are using. - If possible, please provide a minimal code example that reproduces the bug. - - type: textarea - id: what-happened - attributes: - label: What happened? - description: Also tell us, what did you expect to happen? - placeholder: Tell us what you see! - validations: - required: true - - type: textarea - id: version - attributes: - label: Name and Version - description: Which executable and which version of our software are you running? (use `--version` to get a version string) - placeholder: | - $./llama-cli --version - version: 2999 (42b4109e) - built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: What operating system are you seeing the problem on? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: false - - type: textarea - id: logs - attributes: - label: Relevant log output - description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. - render: shell diff --git a/.github/ISSUE_TEMPLATE/02-bug-medium.yml b/.github/ISSUE_TEMPLATE/02-bug-medium.yml deleted file mode 100644 index a6285c6f..00000000 --- a/.github/ISSUE_TEMPLATE/02-bug-medium.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Medium Severity Bug -description: Used to report medium severity bugs in llama.cpp (e.g. Malfunctioning Features but generally still useable) -title: "Bug: " -labels: ["bug-unconfirmed", "medium severity"] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to fill out this bug report! - Please include information about your system, the steps to reproduce the bug, - and the version of llama.cpp that you are using. - If possible, please provide a minimal code example that reproduces the bug. - - type: textarea - id: what-happened - attributes: - label: What happened? - description: Also tell us, what did you expect to happen? - placeholder: Tell us what you see! - validations: - required: true - - type: textarea - id: version - attributes: - label: Name and Version - description: Which executable and which version of our software are you running? (use `--version` to get a version string) - placeholder: | - $./llama-cli --version - version: 2999 (42b4109e) - built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: What operating system are you seeing the problem on? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: false - - type: textarea - id: logs - attributes: - label: Relevant log output - description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. - render: shell diff --git a/.github/ISSUE_TEMPLATE/03-bug-high.yml b/.github/ISSUE_TEMPLATE/03-bug-high.yml deleted file mode 100644 index ff816b93..00000000 --- a/.github/ISSUE_TEMPLATE/03-bug-high.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: High Severity Bug -description: Used to report high severity bugs in llama.cpp (e.g. Malfunctioning features hindering important common workflow) -title: "Bug: " -labels: ["bug-unconfirmed", "high severity"] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to fill out this bug report! - Please include information about your system, the steps to reproduce the bug, - and the version of llama.cpp that you are using. - If possible, please provide a minimal code example that reproduces the bug. - - type: textarea - id: what-happened - attributes: - label: What happened? - description: Also tell us, what did you expect to happen? - placeholder: Tell us what you see! - validations: - required: true - - type: textarea - id: version - attributes: - label: Name and Version - description: Which executable and which version of our software are you running? (use `--version` to get a version string) - placeholder: | - $./llama-cli --version - version: 2999 (42b4109e) - built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: What operating system are you seeing the problem on? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: false - - type: textarea - id: logs - attributes: - label: Relevant log output - description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. - render: shell diff --git a/.github/ISSUE_TEMPLATE/04-bug-critical.yml b/.github/ISSUE_TEMPLATE/04-bug-critical.yml deleted file mode 100644 index 7af42a80..00000000 --- a/.github/ISSUE_TEMPLATE/04-bug-critical.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Critical Severity Bug -description: Used to report critical severity bugs in llama.cpp (e.g. Crashing, Corrupted, Dataloss) -title: "Bug: " -labels: ["bug-unconfirmed", "critical severity"] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to fill out this bug report! - Please include information about your system, the steps to reproduce the bug, - and the version of llama.cpp that you are using. - If possible, please provide a minimal code example that reproduces the bug. - - type: textarea - id: what-happened - attributes: - label: What happened? - description: Also tell us, what did you expect to happen? - placeholder: Tell us what you see! - validations: - required: true - - type: textarea - id: version - attributes: - label: Name and Version - description: Which executable and which version of our software are you running? (use `--version` to get a version string) - placeholder: | - $./llama-cli --version - version: 2999 (42b4109e) - built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: What operating system are you seeing the problem on? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: false - - type: textarea - id: logs - attributes: - label: Relevant log output - description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. - render: shell diff --git a/.github/ISSUE_TEMPLATE/05-enhancement.yml b/.github/ISSUE_TEMPLATE/05-enhancement.yml deleted file mode 100644 index 58fca731..00000000 --- a/.github/ISSUE_TEMPLATE/05-enhancement.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Enhancement -description: Used to request enhancements for llama.cpp -title: "Feature Request: " -labels: ["enhancement"] -body: - - type: markdown - attributes: - value: | - [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas) - - - type: checkboxes - id: prerequisites - attributes: - label: Prerequisites - description: Please confirm the following before submitting your enhancement request. - options: - - label: I am running the latest code. Mention the version if possible as well. - required: true - - label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md). - required: true - - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed). - required: true - - label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share. - required: true - - - type: textarea - id: feature-description - attributes: - label: Feature Description - description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement. - placeholder: Detailed description of the enhancement - validations: - required: true - - - type: textarea - id: motivation - attributes: - label: Motivation - description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users. - placeholder: Explanation of why this feature is needed and its benefits - validations: - required: true - - - type: textarea - id: possible-implementation - attributes: - label: Possible Implementation - description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better. - placeholder: Detailed description of potential implementation - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/06-research.yml b/.github/ISSUE_TEMPLATE/06-research.yml deleted file mode 100644 index 3ae4e9f8..00000000 --- a/.github/ISSUE_TEMPLATE/06-research.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: Research -description: Track new technical research area -title: "Research: " -labels: ["research 🔬"] -body: - - type: markdown - attributes: - value: | - Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22) - - - type: checkboxes - id: research-stage - attributes: - label: Research Stage - description: Track general state of this research ticket - options: - - label: Background Research (Let's try to avoid reinventing the wheel) - - label: Hypothesis Formed (How do you think this will work and it's effect?) - - label: Strategy / Implementation Forming - - label: Analysis of results - - label: Debrief / Documentation (So people in the future can learn from us) - - - type: textarea - id: background - attributes: - label: Previous existing literature and research - description: Whats the current state of the art and whats the motivation for this research? - - - type: textarea - id: hypothesis - attributes: - label: Hypothesis - description: How do you think this will work and it's effect? - - - type: textarea - id: implementation - attributes: - label: Implementation - description: Got an approach? e.g. a PR ready to go? - - - type: textarea - id: analysis - attributes: - label: Analysis - description: How does the proposed implementation behave? - - - type: textarea - id: logs - attributes: - label: Relevant log output - description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. - render: shell diff --git a/.github/ISSUE_TEMPLATE/07-refactor.yml b/.github/ISSUE_TEMPLATE/07-refactor.yml deleted file mode 100644 index 3a68d3d5..00000000 --- a/.github/ISSUE_TEMPLATE/07-refactor.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Refactor (Maintainers) -description: Used to track refactoring opportunities -title: "Refactor: " -labels: ["refactor"] -body: - - type: markdown - attributes: - value: | - Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered. - Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too. - - - type: textarea - id: background-description - attributes: - label: Background Description - description: Please provide a detailed written description of the pain points you are trying to solve. - placeholder: Detailed description behind your motivation to request refactor - validations: - required: true - - - type: textarea - id: possible-approaches - attributes: - label: Possible Refactor Approaches - description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list. - placeholder: Your idea of possible refactoring opportunity/approaches - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index eb8c4b47..00000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,11 +0,0 @@ -blank_issues_enabled: true -contact_links: - - name: Got an idea? - url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas - about: Pop it there. It may then become an enhancement ticket. - - name: Got a question? - url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a - about: Ask a question there! - - name: Want to contribute? - url: https://github.com/ggerganov/llama.cpp/wiki/contribute - about: Head to the contribution guide page of the wiki for areas you can help with diff --git a/.github/labeler.yml b/.github/labeler.yml deleted file mode 100644 index 89436740..00000000 --- a/.github/labeler.yml +++ /dev/null @@ -1,91 +0,0 @@ -# https://github.com/actions/labeler -Kompute: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-kompute.h - - ggml/src/ggml-kompute.cpp - - README-kompute.md -Apple Metal: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-metal.h - - ggml/src/ggml-metal.cpp - - README-metal.md -SYCL: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-sycl.h - - ggml/src/ggml-sycl.cpp - - ggml/src/ggml-sycl/** - - docs/backend/SYCL.md - - examples/sycl/** -Nvidia GPU: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-cuda.h - - ggml/src/ggml-cuda/** -Vulkan: - - changed-files: - - any-glob-to-any-file: - - ggml/ggml_vk_generate_shaders.py - - ggml/src/ggml-vulkan* -documentation: - - changed-files: - - any-glob-to-any-file: - - docs/** - - media/** -testing: - - changed-files: - - any-glob-to-any-file: - - tests/** -build: - - changed-files: - - any-glob-to-any-file: - - cmake/** - - CMakeLists.txt - - CMakePresets.json -examples: - - changed-files: - - any-glob-to-any-file: examples/** -devops: - - changed-files: - - any-glob-to-any-file: - - .devops/** - - .github/** - - ci/** -python: - - changed-files: - - any-glob-to-any-file: - - "**/*.py" - - requirements/** - - gguf-py/** - - .flake8 -script: - - changed-files: - - any-glob-to-any-file: - - scripts/** -android: - - changed-files: - - any-glob-to-any-file: - - examples/llama.android/** -server: - - changed-files: - - any-glob-to-any-file: - - examples/server/** -ggml: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml*.h - - ggml/src/ggml*.c - - ggml/src/ggml*.cpp - - ggml/src/ggml*.h - - ggml-cuda/** -nix: - - changed-files: - - any-glob-to-any-file: - - "**/*.nix" - - .github/workflows/nix-*.yml - - .devops/nix/nixpkgs-instances.nix -embedding: - - changed-files: - - any-glob-to-any-file: examples/embedding/ diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index 997c6d9d..00000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1,7 +0,0 @@ - - -- [x] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) -- Self-reported review complexity: - - [ ] Low - - [ ] Medium - - [ ] High diff --git a/.github/workflows/bench.yml.disabled b/.github/workflows/bench.yml.disabled deleted file mode 100644 index 1c8787ef..00000000 --- a/.github/workflows/bench.yml.disabled +++ /dev/null @@ -1,315 +0,0 @@ -# TODO: there have been some issues with the workflow, so disabling for now -# https://github.com/ggerganov/llama.cpp/issues/7893 -# -# Benchmark -name: Benchmark - -on: - workflow_dispatch: - inputs: - gpu-series: - description: 'Azure GPU series to run with' - required: true - type: choice - options: - - Standard_NC4as_T4_v3 - - Standard_NC24ads_A100_v4 - - Standard_NC80adis_H100_v5 - sha: - description: 'Commit SHA1 to build' - required: false - type: string - duration: - description: 'Duration of the bench' - type: string - default: 10m - - push: - branches: - - master - paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] - pull_request_target: - types: [opened, synchronize, reopened] - paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] - schedule: - - cron: '04 2 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }} - cancel-in-progress: true - -jobs: - bench-server-baseline: - runs-on: Standard_NC4as_T4_v3 - env: - RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it - N_USERS: 8 - DURATION: 10m - - strategy: - matrix: - model: [phi-2] - ftype: [q4_0, q8_0, f16] - include: - - model: phi-2 - ftype: q4_0 - pr_comment_enabled: "true" - - if: | - inputs.gpu-series == 'Standard_NC4as_T4_v3' - || ( - github.event_name == 'schedule' - && github.ref_name == 'master' - && github.repository_owner == 'ggerganov' - ) - || github.event_name == 'pull_request_target' - || ( - github.event_name == 'push' - && github.event.ref == 'refs/heads/master' - && github.repository_owner == 'ggerganov' - ) - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: Install python env - id: pipenv - run: | - cd examples/server/bench - python3 -m venv venv - source venv/bin/activate - pip install -r requirements.txt - - - name: Prometheus - id: install_prometheus - run: | - wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz - tar xzf prometheus*.tar.gz --strip-components=1 - ./prometheus --config.file=examples/server/bench/prometheus.yml & - while ! nc -z localhost 9090; do - sleep 0.1 - done - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: '1.21' - - - name: Install k6 and xk6-sse - id: k6_installation - run: | - cd examples/server/bench - go install go.k6.io/xk6/cmd/xk6@latest - xk6 build master \ - --with github.com/phymbert/xk6-sse - - - name: Build - id: cmake_build - run: | - set -eux - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ - -DLLAMA_CUBLAS=ON \ - -DCUDAToolkit_ROOT=/usr/local/cuda \ - -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ - -DCMAKE_CUDA_ARCHITECTURES=75 \ - -DLLAMA_FATAL_WARNINGS=OFF \ - -DLLAMA_ALL_WARNINGS=OFF \ - -DCMAKE_BUILD_TYPE=Release; - cmake --build build --config Release -j $(nproc) --target llama-server - - - name: Download the dataset - id: download_dataset - run: | - cd examples/server/bench - wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json - - - name: Server bench - id: server_bench - env: - HEAD_REF: ${{ github.head_ref || github.ref_name }} - run: | - set -eux - - cd examples/server/bench - source venv/bin/activate - python bench.py \ - --runner-label ${{ env.RUNNER_LABEL }} \ - --name ${{ github.job }} \ - --branch $HEAD_REF \ - --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \ - --scenario script.js \ - --duration ${{ github.event.inputs.duration || env.DURATION }} \ - --hf-repo ggml-org/models \ - --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ - --model-path-prefix /models \ - --parallel ${{ env.N_USERS }} \ - -ngl 33 \ - --batch-size 2048 \ - --ubatch-size 256 \ - --ctx-size 16384 \ - --n-prompts 1000 \ - --max-prompt-tokens 1024 \ - --max-tokens 2048 - - cat results.github.env >> $GITHUB_ENV - - # Remove dataset as we do not want it in the artefact - rm ShareGPT_V3_unfiltered_cleaned_split.json - - - uses: actions/upload-artifact@v4 - with: - name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - compression-level: 9 - path: | - examples/server/bench/*.jpg - examples/server/bench/*.json - examples/server/bench/*.log - - - name: Commit status - uses: Sibz/github-status-action@v1 - with: - authToken: ${{secrets.GITHUB_TOKEN}} - sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} - context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - description: | - ${{ env.BENCH_RESULTS }} - state: 'success' - - - name: Upload benchmark images - uses: devicons/public-upload-to-imgur@v2.2.2 - continue-on-error: true # Important as it looks unstable: 503 - id: imgur_step - with: - client_id: ${{secrets.IMGUR_CLIENT_ID}} - path: | - examples/server/bench/prompt_tokens_seconds.jpg - examples/server/bench/predicted_tokens_seconds.jpg - examples/server/bench/kv_cache_usage_ratio.jpg - examples/server/bench/requests_processing.jpg - - - name: Extract mermaid - id: set_mermaid - run: | - set -eux - - cd examples/server/bench - PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid) - echo "PROMPT_TOKENS_SECONDS<> $GITHUB_ENV - echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid) - echo "PREDICTED_TOKENS_SECONDS<> $GITHUB_ENV - echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid) - echo "KV_CACHE_USAGE_RATIO<> $GITHUB_ENV - echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - REQUESTS_PROCESSING=$(cat requests_processing.mermaid) - echo "REQUESTS_PROCESSING<> $GITHUB_ENV - echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - - name: Extract image url - id: extract_image_url - continue-on-error: true - run: | - set -eux - - echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV - echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV - echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV - echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV - - - name: Comment PR - uses: mshick/add-pr-comment@v2 - id: comment_pr - if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} - with: - message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - message: | -

- - 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 - -

- -
- - Expand details for performance related PR only - - - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} - - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} - - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s - - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s - - ${{ env.BENCH_GRAPH_XLABEL }} - - -

- - prompt_tokens_seconds - -

- - More - - ```mermaid - ${{ env.PROMPT_TOKENS_SECONDS }} - ``` - -
- - predicted_tokens_seconds - -
- More - - ```mermaid - ${{ env.PREDICTED_TOKENS_SECONDS }} - ``` - -
- -

- -
- - Details - -

- - kv_cache_usage_ratio - -

- More - - ```mermaid - ${{ env.KV_CACHE_USAGE_RATIO }} - ``` - -
- - requests_processing - -
- More - - ```mermaid - ${{ env.REQUESTS_PROCESSING }} - ``` - -
- -

-
-
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 423173b9..00000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,1389 +0,0 @@ -name: CI - -on: - workflow_dispatch: # allows manual triggering - inputs: - create_release: - description: 'Create new release' - required: true - type: boolean - push: - branches: - - master - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] - pull_request: - types: [opened, synchronize, reopened] - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - contents: write # for creating release - -env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - GGML_NLOOP: 3 - GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - -jobs: - macOS-latest-cmake-arm64: - runs-on: macos-14 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - mkdir build - cd build - cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF .. - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L 'main|curl' --verbose --timeout 900 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - cp LICENSE ./build/bin/ - zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip - name: llama-bin-macos-arm64.zip - - macOS-latest-cmake-x64: - runs-on: macos-12 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - # Metal is disabled due to intermittent failures with Github runners not having a GPU: - # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 - cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - cp LICENSE ./build/bin/ - zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip - name: llama-bin-macos-x64.zip - - ubuntu-focal-make: - runs-on: ubuntu-20.04 - env: - LLAMA_NODE_AVAILABLE: true - LLAMA_PYTHON_AVAILABLE: true - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential gcc-8 - - - uses: actions/setup-node@v4 - with: - node-version: "20" - - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Build - id: make_build - env: - LLAMA_FATAL_WARNINGS: 1 - run: | - CC=gcc-8 make -j $(nproc) - - - name: Test - id: make_test - run: | - CC=gcc-8 make tests -j $(nproc) - make test -j $(nproc) - - ubuntu-focal-make-curl: - runs-on: ubuntu-20.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev - - - name: Build - id: make_build - env: - LLAMA_FATAL_WARNINGS: 1 - LLAMA_CURL: 1 - run: | - CC=gcc-8 make -j $(nproc) - - ubuntu-latest-cmake: - runs-on: ubuntu-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - - - name: Build - id: cmake_build - run: | - mkdir build - cd build - cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF - cmake --build . --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L 'main|curl' --verbose --timeout 900 - - - name: Test llama2c conversion - id: llama2c_test - run: | - cd build - echo "Fetch tokenizer" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin - echo "Fetch llama2c model" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin - ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf - ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - cp LICENSE ./build/bin/ - zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip - name: llama-bin-ubuntu-x64.zip - - ubuntu-latest-cmake-sanitizer: - runs-on: ubuntu-latest - - continue-on-error: true - - strategy: - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug, Release] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential - - - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} - run: | - mkdir build - cd build - cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - cmake --build . --config ${{ matrix.build_type }} -j $(nproc) - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - mkdir build - cd build - cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF - cmake --build . --config ${{ matrix.build_type }} -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-latest-cmake-rpc: - runs-on: ubuntu-latest - - continue-on-error: true - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential - - - name: Build - id: cmake_build - run: | - mkdir build - cd build - cmake -DGGML_RPC=ON .. - cmake --build . --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose - - ubuntu-22-cmake-vulkan: - runs-on: ubuntu-22.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list - sudo apt-get update -y - sudo apt-get install -y build-essential vulkan-sdk - - - name: Build - id: cmake_build - run: | - mkdir build - cd build - cmake -DGGML_VULKAN=ON .. - cmake --build . --config Release -j $(nproc) - - ubuntu-22-cmake-hip: - runs-on: ubuntu-22.04 - container: rocm/dev-ubuntu-22.04:6.0.2 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev - - - name: Build with native CMake HIP support - id: cmake_build - run: | - cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON - cmake --build build --config Release -j $(nproc) - - - name: Build with legacy HIP support - id: cmake_build_legacy_hip - run: | - cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON - cmake --build build2 --config Release -j $(nproc) - - ubuntu-22-cmake-sycl: - runs-on: ubuntu-22.04 - - continue-on-error: true - - steps: - - uses: actions/checkout@v4 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash - run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp - - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel - - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Build - id: cmake_build - run: | - source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. - cmake --build . --config Release -j $(nproc) - - ubuntu-22-cmake-sycl-fp16: - runs-on: ubuntu-22.04 - - continue-on-error: true - - steps: - - uses: actions/checkout@v4 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash - run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp - - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel - - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Build - id: cmake_build - run: | - source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON .. - cmake --build . --config Release -j $(nproc) - - # TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know - # how to debug it. - # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124 - macOS-latest-make: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: make_build - env: - LLAMA_FATAL_WARNINGS: 1 - run: | - GGML_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: make_test - run: | - GGML_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu) - GGML_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu) - - # TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know - # how to debug it. - # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584 - # would be great if we fix these - macOS-latest-cmake: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - mkdir build - cd build - cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF .. - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - macOS-latest-cmake-ios: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-cmake-tvos: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=tvOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-swift: - runs-on: macos-latest - - strategy: - matrix: - destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: xcodebuild for swift package - id: xcodebuild - run: | - xcodebuild -scheme llama -destination "${{ matrix.destination }}" - - - name: Build Swift Example - id: make_build_swift_example - run: | - make swift - - windows-msys2: - runs-on: windows-latest - - strategy: - fail-fast: false - matrix: - include: - - { sys: UCRT64, env: ucrt-x86_64, build: Release } - - { sys: CLANG64, env: clang-x86_64, build: Release } - - steps: - - name: Clone - uses: actions/checkout@v4 - - - name: Setup ${{ matrix.sys }} - uses: msys2/setup-msys2@v2 - with: - update: true - msystem: ${{matrix.sys}} - install: >- - base-devel - mingw-w64-${{matrix.env}}-toolchain - mingw-w64-${{matrix.env}}-cmake - mingw-w64-${{matrix.env}}-openblas - - - name: Build using make - shell: msys2 {0} - run: | - make -j $(nproc) - - - name: Clean after building using make - shell: msys2 {0} - run: | - make clean - - - name: Build using make w/ OpenBLAS - shell: msys2 {0} - run: | - make GGML_OPENBLAS=1 -j $(nproc) - - - name: Build using CMake - shell: msys2 {0} - run: | - cmake -B build - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - - name: Clean after building using CMake - shell: msys2 {0} - run: | - rm -rf build - - - name: Build using CMake w/ OpenBLAS - shell: msys2 {0} - run: | - cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - windows-latest-cmake: - runs-on: windows-2019 - - env: - OPENBLAS_VERSION: 0.3.23 - SDE_VERSION: 9.33.0-2024-01-07 - VULKAN_VERSION: 1.3.261.1 - - strategy: - matrix: - include: - - build: 'noavx-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON' - - build: 'avx2-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON' - - build: 'avx-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON' - - build: 'avx512-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON' - - build: 'openblas-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' - - build: 'kompute-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON' - - build: 'vulkan-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON' - - build: 'llvm-arm64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON' - - build: 'msvc-arm64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON' - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Clone Kompute submodule - id: clone_kompute - if: ${{ matrix.build == 'kompute-x64' }} - run: | - git submodule update --init ggml/src/kompute - - - name: Download OpenBLAS - id: get_openblas - if: ${{ matrix.build == 'openblas-x64' }} - run: | - curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" - curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" - mkdir $env:RUNNER_TEMP/openblas - tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas - $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) - $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) - $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') - & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll - - - name: Install Vulkan SDK - id: get_vulkan - if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }} - run: | - curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" - & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install - Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" - Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Build - id: cmake_build - run: | - cmake -S . -B build ${{ matrix.defines }} - cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} - - - name: Add libopenblas.dll - id: add_libopenblas_dll - if: ${{ matrix.build == 'openblas-x64' }} - run: | - cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll - cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt - - - name: Check AVX512F support - id: check_avx512f - if: ${{ matrix.build == 'avx512-x64' }} - continue-on-error: true - run: | - cd build - $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) - $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) - $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe') - echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c - & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main - .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO" - - - name: Test - id: cmake_test - # not all machines have native AVX-512 - if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }} - run: | - cd build - ctest -L main -C Release --verbose --timeout 900 - - - name: Test (Intel SDE) - id: cmake_test_sde - if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation - run: | - curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" - # for some weird reason windows tar doesn't like sde tar.xz - 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz - 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar - $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) - cd build - $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 - & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt - 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip - name: llama-bin-win-${{ matrix.build }}.zip - - windows-latest-cmake-cuda: - runs-on: windows-2019 - - strategy: - matrix: - cuda: ['12.2.0', '11.7.1'] - build: ['cuda'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install CUDA toolkit - id: cuda-toolkit - uses: Jimver/cuda-toolkit@v0.2.15 - with: - cuda: ${{ matrix.cuda }} - method: 'network' - sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' - - - name: Build - id: cmake_build - run: | - mkdir build - cd build - cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON - cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) -t ggml - cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip - name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip - - - name: Copy and pack Cuda runtime - run: | - echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" - $dst='.\build\bin\cudart\' - robocopy "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll - 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\* - - - name: Upload Cuda runtime - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip - name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip - - windows-latest-cmake-sycl: - runs-on: windows-latest - - defaults: - run: - shell: bash - - env: - WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595_offline.exe - WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel - ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install - run: scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL - - - name: Build - id: cmake_build - run: examples/sycl/win-build-sycl.bat - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin" - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.4.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin - - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_win_proxy_loader.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_level_zero.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin - echo "cp oneAPI running time dll files to ./build/bin done" - 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip - name: llama-bin-win-sycl-x64.zip - - windows-latest-cmake-hip: - if: ${{ github.event.inputs.create_release != 'true' }} - runs-on: windows-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Install - id: depends - run: | - $ErrorActionPreference = "Stop" - write-host "Downloading AMD HIP SDK Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" - write-host "Installing AMD HIP SDK" - Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait - write-host "Completed AMD HIP SDK installation" - - - name: Verify ROCm - id: verify - run: | - & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - - - name: Build - id: cmake_build - run: | - $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) - $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON - cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - - windows-latest-cmake-hip-release: - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - runs-on: windows-latest - - strategy: - matrix: - gpu_target: [gfx1100, gfx1101, gfx1030] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Install - id: depends - run: | - $ErrorActionPreference = "Stop" - write-host "Downloading AMD HIP SDK Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" - write-host "Installing AMD HIP SDK" - Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait - write-host "Completed AMD HIP SDK installation" - - - name: Verify ROCm - id: verify - run: | - & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - - - name: Build - id: cmake_build - run: | - $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) - $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON - cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - md "build\bin\rocblas\library\" - cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" - cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" - cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - run: | - 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\* - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip - name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip - - ios-xcode-build: - runs-on: macos-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Build Xcode project - run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build - - android-build: - runs-on: ubuntu-latest - - steps: - - name: Clone - uses: actions/checkout@v4 - - - name: Set up JDK - uses: actions/setup-java@v3 - with: - java-version: 17 - distribution: zulu - - - name: Setup Android SDK - uses: android-actions/setup-android@v3 - with: - log-accepted-android-sdk-licenses: false - - - name: Build - run: | - cd examples/llama.android - - ./gradlew build --no-daemon - -# freeBSD-latest: -# runs-on: macos-12 -# steps: -# - name: Clone -# uses: actions/checkout@v4 -# -# - name: Build -# uses: cross-platform-actions/action@v0.19.0 -# with: -# operating_system: freebsd -# version: '13.2' -# hypervisor: 'qemu' -# run: | -# sudo pkg update -# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas -# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu` - - release: - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - - runs-on: ubuntu-latest - - needs: - - ubuntu-focal-make - - ubuntu-latest-cmake - - macOS-latest-make - - macOS-latest-cmake - - windows-latest-cmake - - windows-latest-cmake-cuda - - windows-latest-cmake-hip-release - - macOS-latest-cmake-arm64 - - macOS-latest-cmake-x64 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Download artifacts - id: download-artifact - uses: actions/download-artifact@v4 - with: - path: ./artifact - - - name: Move artifacts - id: move_artifacts - run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release - - - name: Create release - id: create_release - uses: anzz1/action-create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ steps.tag.outputs.name }} - - - name: Upload release - id: upload_release - uses: actions/github-script@v3 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - script: | - const path = require('path'); - const fs = require('fs'); - const release_id = '${{ steps.create_release.outputs.id }}'; - for (let file of await fs.readdirSync('./artifact/release')) { - if (path.extname(file) === '.zip') { - console.log('uploadReleaseAsset', file); - await github.repos.uploadReleaseAsset({ - owner: context.repo.owner, - repo: context.repo.repo, - release_id: release_id, - name: file, - data: await fs.readFileSync(`./artifact/release/${file}`) - }); - } - } - -# ubuntu-latest-gcc: -# runs-on: ubuntu-latest -# -# strategy: -# matrix: -# build: [Debug, Release] -# -# steps: -# - name: Clone -# uses: actions/checkout@v4 -# -# - name: Dependencies -# run: | -# sudo apt-get update -# sudo apt-get install build-essential -# sudo apt-get install cmake -# -# - name: Configure -# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -# -# - name: Build -# run: | -# make -# -# ubuntu-latest-clang: -# runs-on: ubuntu-latest -# -# strategy: -# matrix: -# build: [Debug, Release] -# -# steps: -# - name: Clone -# uses: actions/checkout@v4 -# -# - name: Dependencies -# run: | -# sudo apt-get update -# sudo apt-get install build-essential -# sudo apt-get install cmake -# -# - name: Configure -# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -# -# - name: Build -# run: | -# make -# -# ubuntu-latest-gcc-sanitized: -# runs-on: ubuntu-latest -# -# strategy: -# matrix: -# sanitizer: [ADDRESS, THREAD, UNDEFINED] -# -# steps: -# - name: Clone -# uses: actions/checkout@v4 -# -# - name: Dependencies -# run: | -# sudo apt-get update -# sudo apt-get install build-essential -# sudo apt-get install cmake -# -# - name: Configure -# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -# -# - name: Build -# run: | -# make -# -# windows: -# runs-on: windows-latest -# -# strategy: -# matrix: -# build: [Release] -# arch: [Win32, x64] -# include: -# - arch: Win32 -# s2arc: x86 -# - arch: x64 -# s2arc: x64 -# -# steps: -# - name: Clone -# uses: actions/checkout@v4 -# -# - name: Add msbuild to PATH -# uses: microsoft/setup-msbuild@v1 -# -# - name: Configure -# run: > -# cmake -S . -B ./build -A ${{ matrix.arch }} -# -DCMAKE_BUILD_TYPE=${{ matrix.build }} -# -# - name: Build -# run: | -# cd ./build -# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} -# -# - name: Upload binaries -# uses: actions/upload-artifact@v4 -# with: -# name: llama-bin-${{ matrix.arch }} -# path: build/bin/${{ matrix.build }} -# -# windows-blas: -# runs-on: windows-latest -# -# strategy: -# matrix: -# build: [Release] -# arch: [Win32, x64] -# blas: [ON] -# include: -# - arch: Win32 -# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip -# s2arc: x86 -# - arch: x64 -# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip -# s2arc: x64 -# -# steps: -# - name: Clone -# uses: actions/checkout@v4 -# -# - name: Add msbuild to PATH -# uses: microsoft/setup-msbuild@v1 -# -# - name: Fetch OpenBLAS -# if: matrix.blas == 'ON' -# run: | -# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }} -# 7z x blas.zip -oblas -y -# copy blas/include/cblas.h . -# copy blas/include/openblas_config.h . -# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV -# -# - name: Configure -# run: > -# cmake -S . -B ./build -A ${{ matrix.arch }} -# -DCMAKE_BUILD_TYPE=${{ matrix.build }} -# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }} -# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib" -# -# - name: Build -# run: | -# cd ./build -# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} -# -# - name: Copy libopenblas.dll -# if: matrix.blas == 'ON' -# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }} -# -# - name: Upload binaries -# if: matrix.blas == 'ON' -# uses: actions/upload-artifact@v4 -# with: -# name: llama-blas-bin-${{ matrix.arch }} -# path: build/bin/${{ matrix.build }} -# -# emscripten: -# runs-on: ubuntu-latest -# -# strategy: -# matrix: -# build: [Release] -# -# steps: -# - name: Clone -# uses: actions/checkout@v4 -# -# - name: Dependencies -# run: | -# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz -# tar -xvf master.tar.gz -# emsdk-master/emsdk update -# emsdk-master/emsdk install latest -# emsdk-master/emsdk activate latest -# -# - name: Configure -# run: echo "tmp" -# -# - name: Build -# run: | -# pushd emsdk-master -# source ./emsdk_env.sh -# popd -# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -# make diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml deleted file mode 100644 index f63860d1..00000000 --- a/.github/workflows/close-issue.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Close inactive issues -on: - schedule: - - cron: "42 0 * * *" - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - issues: write - -jobs: - close-issues: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@v5 - with: - exempt-issue-labels: "refactor,help wanted,good first issue,research,bug" - days-before-issue-stale: 30 - days-before-issue-close: 14 - stale-issue-label: "stale" - close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." - days-before-pr-stale: -1 - days-before-pr-close: -1 - operations-per-run: 10000 - repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml deleted file mode 100644 index a4ac9b21..00000000 --- a/.github/workflows/docker.yml +++ /dev/null @@ -1,125 +0,0 @@ -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -# GitHub recommends pinning actions to a commit SHA. -# To get a newer version, you will need to update the SHA. -# You can also reference a tag or branch, but the action may change without warning. - -name: Publish Docker image - -on: - #pull_request: - push: - branches: - - master - paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] - workflow_dispatch: # allows manual triggering, useful for debugging - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - packages: write - -jobs: - push_to_registry: - name: Push Docker image to Docker Hub - #if: github.event.pull_request.draft == false - - runs-on: ubuntu-latest - env: - COMMIT_SHA: ${{ github.sha }} - strategy: - matrix: - config: - - { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" } - - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" } - - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } - # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete - #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - #- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } - - { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" } - - { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" } - steps: - - name: Check out the repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 # preserve git history, so we can determine the build number - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Log in to Docker Hub - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case - REPO_NAME="${{ github.event.repository.name }}" - - # determine tag name postfix (build number, commit hash) - if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then - TAG_POSTFIX="b${BUILD_NUMBER}" - else - SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-') - TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}" - fi - - # list all tags possible - TAGS="" - TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}," - TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}" - - echo "output_tags=$TAGS" >> $GITHUB_OUTPUT - echo "output_tags=$TAGS" # print out for debugging - env: - GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' - - # https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: true - swap-storage: true - - - name: Build and push Docker image (tagged + versioned) - if: github.event_name == 'push' - uses: docker/build-push-action@v6 - with: - context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.output_tags }} - file: ${{ matrix.config.dockerfile }} diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml deleted file mode 100644 index ae86e992..00000000 --- a/.github/workflows/editorconfig.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: EditorConfig Checker - -on: - workflow_dispatch: # allows manual triggering - inputs: - create_release: - description: 'Create new release' - required: true - type: boolean - push: - branches: - - master - pull_request: - branches: - - master - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - editorconfig: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: editorconfig-checker/action-editorconfig-checker@main - - run: editorconfig-checker diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml deleted file mode 100644 index 3ca4d305..00000000 --- a/.github/workflows/gguf-publish.yml +++ /dev/null @@ -1,44 +0,0 @@ -# This workflow will upload a Python Package using Twine when a GGUF release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - -# See `gguf-py/README.md` for how to make a release. - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -name: Upload Python Package - -on: - workflow_dispatch: - push: - # Pattern matched against refs/tags - tags: - - 'gguf-v*' # Push events to every version tag - - -jobs: - deploy: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.9.x' - - name: Install dependencies - run: | - cd gguf-py - python -m pip install poetry - poetry install - - - name: Build package - run: cd gguf-py && poetry build - - name: Publish package - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} - packages-dir: gguf-py/dist diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml deleted file mode 100644 index 368dbdbe..00000000 --- a/.github/workflows/labeler.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: "Pull Request Labeler" -on: -- pull_request_target - -jobs: - labeler: - permissions: - contents: read - pull-requests: write - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - repository: "ggerganov/llama.cpp" - - uses: actions/labeler@v5 - with: - configuration-path: '.github/labeler.yml' diff --git a/.github/workflows/nix-ci-aarch64.yml b/.github/workflows/nix-ci-aarch64.yml deleted file mode 100644 index 0da6acdf..00000000 --- a/.github/workflows/nix-ci-aarch64.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: Nix aarch64 builds - -on: - workflow_dispatch: # allows manual triggering - schedule: - # Rebuild daily rather than on every push because QEMU is expensive (e.g. - # 1.5h instead of minutes with the cold cache). - # - # randint(0, 59), randint(0, 23) - - cron: '26 12 * * *' - # But also rebuild if we touched any of the Nix expressions: - push: - branches: - - master - paths: ['**/*.nix', 'flake.lock'] - pull_request: - types: [opened, synchronize, reopened] - paths: ['**/*.nix', 'flake.lock'] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub - id-token: write - contents: read - -jobs: - nix-build-aarch64: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Install QEMU - # Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654 - run: | - sudo apt-get update - sudo apt-get install -y qemu-user-static qemu-system-aarch64 - sudo usermod -a -G kvm $USER - - name: Install Nix - uses: DeterminateSystems/nix-installer-action@v9 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - extra-conf: | - extra-platforms = aarch64-linux - extra-system-features = nixos-test kvm - extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org - extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E= - - uses: DeterminateSystems/magic-nix-cache-action@v2 - with: - upstream-cache: https://${{ matrix.cachixName }}.cachix.org - - name: Set-up cachix to push the results to - uses: cachix/cachix-action@v13 - with: - authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' - name: llama-cpp - - name: Show all output paths - run: > - nix run github:nix-community/nix-eval-jobs - -- --gc-roots-dir gcroot - --flake - ".#packages.aarch64-linux" - - name: Build - run: > - nix run github:Mic92/nix-fast-build - -- --skip-cached --no-nom - --systems aarch64-linux - --flake - ".#checks.aarch64-linux" diff --git a/.github/workflows/nix-ci.yml b/.github/workflows/nix-ci.yml deleted file mode 100644 index 8ecbbe53..00000000 --- a/.github/workflows/nix-ci.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: Nix CI - -on: - workflow_dispatch: # allows manual triggering - push: - branches: - - master - pull_request: - types: [opened, synchronize, reopened] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub - id-token: write - contents: read - -jobs: - nix-eval: - strategy: - fail-fast: false - matrix: - os: [ ubuntu-latest, macos-latest ] - runs-on: ${{ matrix.os }} - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Install Nix - uses: DeterminateSystems/nix-installer-action@v9 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - extra-conf: | - extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org - extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E= - - uses: DeterminateSystems/magic-nix-cache-action@v2 - with: - upstream-cache: https://${{ matrix.cachixName }}.cachix.org - - name: List all flake outputs - run: nix flake show --all-systems - - name: Show all output paths - run: > - nix run github:nix-community/nix-eval-jobs - -- --gc-roots-dir gcroot - --flake - ".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)" - nix-build: - strategy: - fail-fast: false - matrix: - os: [ ubuntu-latest, macos-latest ] - runs-on: ${{ matrix.os }} - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Install Nix - uses: DeterminateSystems/nix-installer-action@v9 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - extra-conf: | - extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org - extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E= - - uses: DeterminateSystems/magic-nix-cache-action@v2 - with: - upstream-cache: https://${{ matrix.cachixName }}.cachix.org - - name: Set-up cachix to push the results to - uses: cachix/cachix-action@v13 - with: - authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' - name: llama-cpp - - name: Build - run: > - nix run github:Mic92/nix-fast-build - -- --skip-cached --no-nom - --flake - ".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)" diff --git a/.github/workflows/nix-flake-update.yml b/.github/workflows/nix-flake-update.yml deleted file mode 100644 index 3a6a96e2..00000000 --- a/.github/workflows/nix-flake-update.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: update-flake-lock -on: - workflow_dispatch: - schedule: - - cron: '0 0 * * 0' # runs weekly on Sunday at 00:00 - -jobs: - lockfile: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Install Nix - uses: DeterminateSystems/nix-installer-action@main - - name: Update flake.lock - uses: DeterminateSystems/update-flake-lock@main - with: - pr-title: "nix: update flake.lock" - pr-labels: | - nix - pr-reviewers: philiptaron,SomeoneSerge - token: ${{ secrets.FLAKE_TOKEN }} diff --git a/.github/workflows/nix-publish-flake.yml b/.github/workflows/nix-publish-flake.yml deleted file mode 100644 index 2c3c1ebd..00000000 --- a/.github/workflows/nix-publish-flake.yml +++ /dev/null @@ -1,36 +0,0 @@ -# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes -name: "Publish a flake to flakestry & flakehub" -on: - push: - tags: - - "*" - workflow_dispatch: - inputs: - tag: - description: "The existing tag to publish" - type: "string" - required: true -jobs: - flakestry-publish: - runs-on: ubuntu-latest - permissions: - id-token: "write" - contents: "read" - steps: - - uses: flakestry/flakestry-publish@main - with: - version: "${{ inputs.tag || github.ref_name }}" - flakehub-publish: - runs-on: "ubuntu-latest" - permissions: - id-token: "write" - contents: "read" - steps: - - uses: "actions/checkout@v4" - with: - ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}" - - uses: "DeterminateSystems/nix-installer-action@main" - - uses: "DeterminateSystems/flakehub-push@main" - with: - visibility: "public" - tag: "${{ inputs.tag }}" diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml deleted file mode 100644 index 46e80aec..00000000 --- a/.github/workflows/python-check-requirements.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Python check requirements.txt - -on: - push: - paths: - - '.github/workflows/python-check-requirements.yml' - - 'scripts/check-requirements.sh' - - 'convert*.py' - - '**/requirements*.txt' - pull_request: - paths: - - '.github/workflows/python-check-requirements.yml' - - 'scripts/check-requirements.sh' - - 'convert*.py' - - '**/requirements*.txt' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - python-check-requirements: - runs-on: ubuntu-latest - name: check-requirements - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Run check-requirements.sh script - run: bash scripts/check-requirements.sh diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml deleted file mode 100644 index a8d46f31..00000000 --- a/.github/workflows/python-lint.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: flake8 Lint - -on: [push, pull_request] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - flake8-lint: - runs-on: ubuntu-latest - name: Lint - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: flake8 Lint - uses: py-actions/flake8@v2 - with: - plugins: "flake8-no-print" diff --git a/.github/workflows/python-type-check.yml b/.github/workflows/python-type-check.yml deleted file mode 100644 index 373bb601..00000000 --- a/.github/workflows/python-type-check.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Python Type-Check - -on: - push: - paths: - - '.github/workflows/python-type-check.yml' - - 'pyrightconfig.json' - - '**.py' - - '**/requirements*.txt' - pull_request: - paths: - - '.github/workflows/python-type-check.yml' - - 'pyrightconfig.json' - - '**.py' - - '**/requirements*.txt' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - python-type-check: - runs-on: ubuntu-latest - name: pyright type-check - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Install Python dependencies - # TODO: use a venv - run: pip install -r requirements/requirements-all.txt - - name: Type-check with Pyright - uses: jakebailey/pyright-action@v2 - with: - version: 1.1.382 - level: warning - warnings: true diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml deleted file mode 100644 index 699ac095..00000000 --- a/.github/workflows/server.yml +++ /dev/null @@ -1,190 +0,0 @@ -# Server build and tests -name: Server - -on: - workflow_dispatch: # allows manual triggering - inputs: - sha: - description: 'Commit SHA1 to build' - required: false - type: string - slow_tests: - description: 'Run slow tests' - required: true - type: boolean - push: - branches: - - master - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] - pull_request: - types: [opened, synchronize, reopened] - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] - -env: - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - LLAMA_LOG_VERBOSITY: 10 - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - server: - runs-on: ubuntu-latest - - strategy: - matrix: - sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken - build_type: [RelWithDebInfo] - include: - - build_type: Release - sanitizer: "" - fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken - - steps: - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get -y install \ - build-essential \ - xxd \ - git \ - cmake \ - curl \ - wget \ - language-pack-en \ - libcurl4-openssl-dev - - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: Python setup - id: setup_python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Tests dependencies - id: test_dependencies - run: | - pip install -r examples/server/tests/requirements.txt - - - name: Verify server deps - id: verify_server_deps - run: | - git config --global --add safe.directory $(realpath .) - cd examples/server - git ls-files --others --modified - git status - ./deps.sh - git status - not_ignored_files="$(git ls-files --others --modified)" - echo "Modified files: ${not_ignored_files}" - if [ -n "${not_ignored_files}" ]; then - echo "Repository is dirty or server deps are not built as expected" - echo "${not_ignored_files}" - exit 1 - fi - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_OPENMP=OFF ; - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} - run: | - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - - name: Tests - id: server_integration_tests - run: | - cd examples/server/tests - PORT=8888 ./tests.sh - - - name: Slow tests - id: server_integration_tests_slow - if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} - run: | - cd examples/server/tests - PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow - - - server-windows: - runs-on: windows-2019 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: libCURL - id: get_libcurl - env: - CURL_VERSION: 8.6.0_6 - run: | - curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" - mkdir $env:RUNNER_TEMP/libcurl - tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl - - - name: Build - id: cmake_build - run: | - cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" - cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server - - - name: Python setup - id: setup_python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Tests dependencies - id: test_dependencies - run: | - pip install -r examples/server/tests/requirements.txt - - - name: Copy Libcurl - id: prepare_libcurl - run: | - cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll - - - name: Tests - id: server_integration_tests - if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} - run: | - cd examples/server/tests - $env:PYTHONIOENCODING = ":replace" - behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp - - - name: Slow tests - id: server_integration_tests_slow - if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} - run: | - cd examples/server/tests - behave.exe --stop --no-skipped --no-capture --tags slow