mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-08 00:59:02 +00:00
delete .github
This commit is contained in:
parent
c97ea10617
commit
5685cb87ed
25 changed files with 0 additions and 2880 deletions
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
|
@ -1,50 +0,0 @@
|
|||
name: Low Severity Bugs
|
||||
description: Used to report low severity bugs in llama.cpp (e.g. cosmetic issues, non critical UI glitches)
|
||||
title: "Bug: "
|
||||
labels: ["bug-unconfirmed", "low severity"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
Please include information about your system, the steps to reproduce the bug,
|
||||
and the version of llama.cpp that you are using.
|
||||
If possible, please provide a minimal code example that reproduces the bug.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: What operating system are you seeing the problem on?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
|
@ -1,50 +0,0 @@
|
|||
name: Medium Severity Bug
|
||||
description: Used to report medium severity bugs in llama.cpp (e.g. Malfunctioning Features but generally still useable)
|
||||
title: "Bug: "
|
||||
labels: ["bug-unconfirmed", "medium severity"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
Please include information about your system, the steps to reproduce the bug,
|
||||
and the version of llama.cpp that you are using.
|
||||
If possible, please provide a minimal code example that reproduces the bug.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: What operating system are you seeing the problem on?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
|
@ -1,50 +0,0 @@
|
|||
name: High Severity Bug
|
||||
description: Used to report high severity bugs in llama.cpp (e.g. Malfunctioning features hindering important common workflow)
|
||||
title: "Bug: "
|
||||
labels: ["bug-unconfirmed", "high severity"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
Please include information about your system, the steps to reproduce the bug,
|
||||
and the version of llama.cpp that you are using.
|
||||
If possible, please provide a minimal code example that reproduces the bug.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: What operating system are you seeing the problem on?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
|
@ -1,50 +0,0 @@
|
|||
name: Critical Severity Bug
|
||||
description: Used to report critical severity bugs in llama.cpp (e.g. Crashing, Corrupted, Dataloss)
|
||||
title: "Bug: "
|
||||
labels: ["bug-unconfirmed", "critical severity"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this bug report!
|
||||
Please include information about your system, the steps to reproduce the bug,
|
||||
and the version of llama.cpp that you are using.
|
||||
If possible, please provide a minimal code example that reproduces the bug.
|
||||
- type: textarea
|
||||
id: what-happened
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: Also tell us, what did you expect to happen?
|
||||
placeholder: Tell us what you see!
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: version
|
||||
attributes:
|
||||
label: Name and Version
|
||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||
placeholder: |
|
||||
$./llama-cli --version
|
||||
version: 2999 (42b4109e)
|
||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: operating-system
|
||||
attributes:
|
||||
label: What operating system are you seeing the problem on?
|
||||
multiple: true
|
||||
options:
|
||||
- Linux
|
||||
- Mac
|
||||
- Windows
|
||||
- BSD
|
||||
- Other? (Please let us know in description)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
51
.github/ISSUE_TEMPLATE/05-enhancement.yml
vendored
51
.github/ISSUE_TEMPLATE/05-enhancement.yml
vendored
|
@ -1,51 +0,0 @@
|
|||
name: Enhancement
|
||||
description: Used to request enhancements for llama.cpp
|
||||
title: "Feature Request: "
|
||||
labels: ["enhancement"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
[Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
|
||||
|
||||
- type: checkboxes
|
||||
id: prerequisites
|
||||
attributes:
|
||||
label: Prerequisites
|
||||
description: Please confirm the following before submitting your enhancement request.
|
||||
options:
|
||||
- label: I am running the latest code. Mention the version if possible as well.
|
||||
required: true
|
||||
- label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
|
||||
required: true
|
||||
- label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
|
||||
required: true
|
||||
- label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: feature-description
|
||||
attributes:
|
||||
label: Feature Description
|
||||
description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
|
||||
placeholder: Detailed description of the enhancement
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: motivation
|
||||
attributes:
|
||||
label: Motivation
|
||||
description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
|
||||
placeholder: Explanation of why this feature is needed and its benefits
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: possible-implementation
|
||||
attributes:
|
||||
label: Possible Implementation
|
||||
description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
|
||||
placeholder: Detailed description of potential implementation
|
||||
validations:
|
||||
required: false
|
52
.github/ISSUE_TEMPLATE/06-research.yml
vendored
52
.github/ISSUE_TEMPLATE/06-research.yml
vendored
|
@ -1,52 +0,0 @@
|
|||
name: Research
|
||||
description: Track new technical research area
|
||||
title: "Research: "
|
||||
labels: ["research 🔬"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
|
||||
|
||||
- type: checkboxes
|
||||
id: research-stage
|
||||
attributes:
|
||||
label: Research Stage
|
||||
description: Track general state of this research ticket
|
||||
options:
|
||||
- label: Background Research (Let's try to avoid reinventing the wheel)
|
||||
- label: Hypothesis Formed (How do you think this will work and it's effect?)
|
||||
- label: Strategy / Implementation Forming
|
||||
- label: Analysis of results
|
||||
- label: Debrief / Documentation (So people in the future can learn from us)
|
||||
|
||||
- type: textarea
|
||||
id: background
|
||||
attributes:
|
||||
label: Previous existing literature and research
|
||||
description: Whats the current state of the art and whats the motivation for this research?
|
||||
|
||||
- type: textarea
|
||||
id: hypothesis
|
||||
attributes:
|
||||
label: Hypothesis
|
||||
description: How do you think this will work and it's effect?
|
||||
|
||||
- type: textarea
|
||||
id: implementation
|
||||
attributes:
|
||||
label: Implementation
|
||||
description: Got an approach? e.g. a PR ready to go?
|
||||
|
||||
- type: textarea
|
||||
id: analysis
|
||||
attributes:
|
||||
label: Analysis
|
||||
description: How does the proposed implementation behave?
|
||||
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Relevant log output
|
||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||
render: shell
|
28
.github/ISSUE_TEMPLATE/07-refactor.yml
vendored
28
.github/ISSUE_TEMPLATE/07-refactor.yml
vendored
|
@ -1,28 +0,0 @@
|
|||
name: Refactor (Maintainers)
|
||||
description: Used to track refactoring opportunities
|
||||
title: "Refactor: "
|
||||
labels: ["refactor"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
|
||||
Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
|
||||
|
||||
- type: textarea
|
||||
id: background-description
|
||||
attributes:
|
||||
label: Background Description
|
||||
description: Please provide a detailed written description of the pain points you are trying to solve.
|
||||
placeholder: Detailed description behind your motivation to request refactor
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: possible-approaches
|
||||
attributes:
|
||||
label: Possible Refactor Approaches
|
||||
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
|
||||
placeholder: Your idea of possible refactoring opportunity/approaches
|
||||
validations:
|
||||
required: false
|
11
.github/ISSUE_TEMPLATE/config.yml
vendored
11
.github/ISSUE_TEMPLATE/config.yml
vendored
|
@ -1,11 +0,0 @@
|
|||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: Got an idea?
|
||||
url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
|
||||
about: Pop it there. It may then become an enhancement ticket.
|
||||
- name: Got a question?
|
||||
url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
|
||||
about: Ask a question there!
|
||||
- name: Want to contribute?
|
||||
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
||||
about: Head to the contribution guide page of the wiki for areas you can help with
|
91
.github/labeler.yml
vendored
91
.github/labeler.yml
vendored
|
@ -1,91 +0,0 @@
|
|||
# https://github.com/actions/labeler
|
||||
Kompute:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml-kompute.h
|
||||
- ggml/src/ggml-kompute.cpp
|
||||
- README-kompute.md
|
||||
Apple Metal:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml-metal.h
|
||||
- ggml/src/ggml-metal.cpp
|
||||
- README-metal.md
|
||||
SYCL:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml-sycl.h
|
||||
- ggml/src/ggml-sycl.cpp
|
||||
- ggml/src/ggml-sycl/**
|
||||
- docs/backend/SYCL.md
|
||||
- examples/sycl/**
|
||||
Nvidia GPU:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml-cuda.h
|
||||
- ggml/src/ggml-cuda/**
|
||||
Vulkan:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/ggml_vk_generate_shaders.py
|
||||
- ggml/src/ggml-vulkan*
|
||||
documentation:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- docs/**
|
||||
- media/**
|
||||
testing:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- tests/**
|
||||
build:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- cmake/**
|
||||
- CMakeLists.txt
|
||||
- CMakePresets.json
|
||||
examples:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: examples/**
|
||||
devops:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- .devops/**
|
||||
- .github/**
|
||||
- ci/**
|
||||
python:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "**/*.py"
|
||||
- requirements/**
|
||||
- gguf-py/**
|
||||
- .flake8
|
||||
script:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- scripts/**
|
||||
android:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- examples/llama.android/**
|
||||
server:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- examples/server/**
|
||||
ggml:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- ggml/include/ggml*.h
|
||||
- ggml/src/ggml*.c
|
||||
- ggml/src/ggml*.cpp
|
||||
- ggml/src/ggml*.h
|
||||
- ggml-cuda/**
|
||||
nix:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file:
|
||||
- "**/*.nix"
|
||||
- .github/workflows/nix-*.yml
|
||||
- .devops/nix/nixpkgs-instances.nix
|
||||
embedding:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: examples/embedding/
|
7
.github/pull_request_template.md
vendored
7
.github/pull_request_template.md
vendored
|
@ -1,7 +0,0 @@
|
|||
|
||||
|
||||
- [x] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
|
||||
- Self-reported review complexity:
|
||||
- [ ] Low
|
||||
- [ ] Medium
|
||||
- [ ] High
|
315
.github/workflows/bench.yml.disabled
vendored
315
.github/workflows/bench.yml.disabled
vendored
|
@ -1,315 +0,0 @@
|
|||
# TODO: there have been some issues with the workflow, so disabling for now
|
||||
# https://github.com/ggerganov/llama.cpp/issues/7893
|
||||
#
|
||||
# Benchmark
|
||||
name: Benchmark
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
gpu-series:
|
||||
description: 'Azure GPU series to run with'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- Standard_NC4as_T4_v3
|
||||
- Standard_NC24ads_A100_v4
|
||||
- Standard_NC80adis_H100_v5
|
||||
sha:
|
||||
description: 'Commit SHA1 to build'
|
||||
required: false
|
||||
type: string
|
||||
duration:
|
||||
description: 'Duration of the bench'
|
||||
type: string
|
||||
default: 10m
|
||||
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||
schedule:
|
||||
- cron: '04 2 * * *'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
bench-server-baseline:
|
||||
runs-on: Standard_NC4as_T4_v3
|
||||
env:
|
||||
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
||||
N_USERS: 8
|
||||
DURATION: 10m
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
model: [phi-2]
|
||||
ftype: [q4_0, q8_0, f16]
|
||||
include:
|
||||
- model: phi-2
|
||||
ftype: q4_0
|
||||
pr_comment_enabled: "true"
|
||||
|
||||
if: |
|
||||
inputs.gpu-series == 'Standard_NC4as_T4_v3'
|
||||
|| (
|
||||
github.event_name == 'schedule'
|
||||
&& github.ref_name == 'master'
|
||||
&& github.repository_owner == 'ggerganov'
|
||||
)
|
||||
|| github.event_name == 'pull_request_target'
|
||||
|| (
|
||||
github.event_name == 'push'
|
||||
&& github.event.ref == 'refs/heads/master'
|
||||
&& github.repository_owner == 'ggerganov'
|
||||
)
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||
|
||||
- name: Install python env
|
||||
id: pipenv
|
||||
run: |
|
||||
cd examples/server/bench
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Prometheus
|
||||
id: install_prometheus
|
||||
run: |
|
||||
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
|
||||
tar xzf prometheus*.tar.gz --strip-components=1
|
||||
./prometheus --config.file=examples/server/bench/prometheus.yml &
|
||||
while ! nc -z localhost 9090; do
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21'
|
||||
|
||||
- name: Install k6 and xk6-sse
|
||||
id: k6_installation
|
||||
run: |
|
||||
cd examples/server/bench
|
||||
go install go.k6.io/xk6/cmd/xk6@latest
|
||||
xk6 build master \
|
||||
--with github.com/phymbert/xk6-sse
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
set -eux
|
||||
cmake -B build \
|
||||
-DGGML_NATIVE=OFF \
|
||||
-DLLAMA_BUILD_SERVER=ON \
|
||||
-DLLAMA_CURL=ON \
|
||||
-DLLAMA_CUBLAS=ON \
|
||||
-DCUDAToolkit_ROOT=/usr/local/cuda \
|
||||
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
||||
-DCMAKE_CUDA_ARCHITECTURES=75 \
|
||||
-DLLAMA_FATAL_WARNINGS=OFF \
|
||||
-DLLAMA_ALL_WARNINGS=OFF \
|
||||
-DCMAKE_BUILD_TYPE=Release;
|
||||
cmake --build build --config Release -j $(nproc) --target llama-server
|
||||
|
||||
- name: Download the dataset
|
||||
id: download_dataset
|
||||
run: |
|
||||
cd examples/server/bench
|
||||
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
||||
|
||||
- name: Server bench
|
||||
id: server_bench
|
||||
env:
|
||||
HEAD_REF: ${{ github.head_ref || github.ref_name }}
|
||||
run: |
|
||||
set -eux
|
||||
|
||||
cd examples/server/bench
|
||||
source venv/bin/activate
|
||||
python bench.py \
|
||||
--runner-label ${{ env.RUNNER_LABEL }} \
|
||||
--name ${{ github.job }} \
|
||||
--branch $HEAD_REF \
|
||||
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
||||
--scenario script.js \
|
||||
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
||||
--hf-repo ggml-org/models \
|
||||
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
|
||||
--model-path-prefix /models \
|
||||
--parallel ${{ env.N_USERS }} \
|
||||
-ngl 33 \
|
||||
--batch-size 2048 \
|
||||
--ubatch-size 256 \
|
||||
--ctx-size 16384 \
|
||||
--n-prompts 1000 \
|
||||
--max-prompt-tokens 1024 \
|
||||
--max-tokens 2048
|
||||
|
||||
cat results.github.env >> $GITHUB_ENV
|
||||
|
||||
# Remove dataset as we do not want it in the artefact
|
||||
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||
compression-level: 9
|
||||
path: |
|
||||
examples/server/bench/*.jpg
|
||||
examples/server/bench/*.json
|
||||
examples/server/bench/*.log
|
||||
|
||||
- name: Commit status
|
||||
uses: Sibz/github-status-action@v1
|
||||
with:
|
||||
authToken: ${{secrets.GITHUB_TOKEN}}
|
||||
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
||||
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||
description: |
|
||||
${{ env.BENCH_RESULTS }}
|
||||
state: 'success'
|
||||
|
||||
- name: Upload benchmark images
|
||||
uses: devicons/public-upload-to-imgur@v2.2.2
|
||||
continue-on-error: true # Important as it looks unstable: 503
|
||||
id: imgur_step
|
||||
with:
|
||||
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
||||
path: |
|
||||
examples/server/bench/prompt_tokens_seconds.jpg
|
||||
examples/server/bench/predicted_tokens_seconds.jpg
|
||||
examples/server/bench/kv_cache_usage_ratio.jpg
|
||||
examples/server/bench/requests_processing.jpg
|
||||
|
||||
- name: Extract mermaid
|
||||
id: set_mermaid
|
||||
run: |
|
||||
set -eux
|
||||
|
||||
cd examples/server/bench
|
||||
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
|
||||
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
||||
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
|
||||
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
||||
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
|
||||
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
|
||||
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
|
||||
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
|
||||
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
- name: Extract image url
|
||||
id: extract_image_url
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -eux
|
||||
|
||||
echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
|
||||
echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
|
||||
echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
|
||||
echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
|
||||
|
||||
- name: Comment PR
|
||||
uses: mshick/add-pr-comment@v2
|
||||
id: comment_pr
|
||||
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
|
||||
with:
|
||||
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||
message: |
|
||||
<p align="center">
|
||||
|
||||
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
||||
|
||||
</p>
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Expand details for performance related PR only</summary>
|
||||
|
||||
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
||||
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
||||
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
||||
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
||||
- ${{ env.BENCH_GRAPH_XLABEL }}
|
||||
|
||||
|
||||
<p align="center">
|
||||
|
||||
<img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
|
||||
|
||||
<details>
|
||||
|
||||
<summary>More</summary>
|
||||
|
||||
```mermaid
|
||||
${{ env.PROMPT_TOKENS_SECONDS }}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
|
||||
|
||||
<details>
|
||||
<summary>More</summary>
|
||||
|
||||
```mermaid
|
||||
${{ env.PREDICTED_TOKENS_SECONDS }}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</p>
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Details</summary>
|
||||
|
||||
<p align="center">
|
||||
|
||||
<img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
|
||||
|
||||
<details>
|
||||
<summary>More</summary>
|
||||
|
||||
```mermaid
|
||||
${{ env.KV_CACHE_USAGE_RATIO }}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
|
||||
|
||||
<details>
|
||||
<summary>More</summary>
|
||||
|
||||
```mermaid
|
||||
${{ env.REQUESTS_PROCESSING }}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
</p>
|
||||
</details>
|
||||
</details>
|
1389
.github/workflows/build.yml
vendored
1389
.github/workflows/build.yml
vendored
File diff suppressed because it is too large
Load diff
28
.github/workflows/close-issue.yml
vendored
28
.github/workflows/close-issue.yml
vendored
|
@ -1,28 +0,0 @@
|
|||
name: Close inactive issues
|
||||
on:
|
||||
schedule:
|
||||
- cron: "42 0 * * *"
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
close-issues:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
|
||||
days-before-issue-stale: 30
|
||||
days-before-issue-close: 14
|
||||
stale-issue-label: "stale"
|
||||
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
||||
days-before-pr-stale: -1
|
||||
days-before-pr-close: -1
|
||||
operations-per-run: 10000
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
125
.github/workflows/docker.yml
vendored
125
.github/workflows/docker.yml
vendored
|
@ -1,125 +0,0 @@
|
|||
# This workflow uses actions that are not certified by GitHub.
|
||||
# They are provided by a third-party and are governed by
|
||||
# separate terms of service, privacy policy, and support
|
||||
# documentation.
|
||||
|
||||
# GitHub recommends pinning actions to a commit SHA.
|
||||
# To get a newer version, you will need to update the SHA.
|
||||
# You can also reference a tag or branch, but the action may change without warning.
|
||||
|
||||
name: Publish Docker image
|
||||
|
||||
on:
|
||||
#pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
||||
workflow_dispatch: # allows manual triggering, useful for debugging
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
push_to_registry:
|
||||
name: Push Docker image to Docker Hub
|
||||
#if: github.event.pull_request.draft == false
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
||||
#- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
#- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
||||
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
||||
steps:
|
||||
- name: Check out the repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # preserve git history, so we can determine the build number
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Determine tag name
|
||||
id: tag
|
||||
shell: bash
|
||||
run: |
|
||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
||||
REPO_NAME="${{ github.event.repository.name }}"
|
||||
|
||||
# determine tag name postfix (build number, commit hash)
|
||||
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
||||
TAG_POSTFIX="b${BUILD_NUMBER}"
|
||||
else
|
||||
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
||||
TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}"
|
||||
fi
|
||||
|
||||
# list all tags possible
|
||||
TAGS=""
|
||||
TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }},"
|
||||
TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}"
|
||||
|
||||
echo "output_tags=$TAGS" >> $GITHUB_OUTPUT
|
||||
echo "output_tags=$TAGS" # print out for debugging
|
||||
env:
|
||||
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
||||
|
||||
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
tool-cache: false
|
||||
|
||||
# all of these default to true, but feel free to set to
|
||||
# "false" if necessary for your workflow
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
docker-images: true
|
||||
swap-storage: true
|
||||
|
||||
- name: Build and push Docker image (tagged + versioned)
|
||||
if: github.event_name == 'push'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
platforms: ${{ matrix.config.platforms }}
|
||||
# tag list is generated from step above
|
||||
tags: ${{ steps.tag.outputs.output_tags }}
|
||||
file: ${{ matrix.config.dockerfile }}
|
27
.github/workflows/editorconfig.yml
vendored
27
.github/workflows/editorconfig.yml
vendored
|
@ -1,27 +0,0 @@
|
|||
name: EditorConfig Checker
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
inputs:
|
||||
create_release:
|
||||
description: 'Create new release'
|
||||
required: true
|
||||
type: boolean
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
editorconfig:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: editorconfig-checker/action-editorconfig-checker@main
|
||||
- run: editorconfig-checker
|
44
.github/workflows/gguf-publish.yml
vendored
44
.github/workflows/gguf-publish.yml
vendored
|
@ -1,44 +0,0 @@
|
|||
# This workflow will upload a Python Package using Twine when a GGUF release is created
|
||||
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
||||
|
||||
# See `gguf-py/README.md` for how to make a release.
|
||||
|
||||
# This workflow uses actions that are not certified by GitHub.
|
||||
# They are provided by a third-party and are governed by
|
||||
# separate terms of service, privacy policy, and support
|
||||
# documentation.
|
||||
|
||||
name: Upload Python Package
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
# Pattern matched against refs/tags
|
||||
tags:
|
||||
- 'gguf-v*' # Push events to every version tag
|
||||
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.9.x'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
cd gguf-py
|
||||
python -m pip install poetry
|
||||
poetry install
|
||||
|
||||
- name: Build package
|
||||
run: cd gguf-py && poetry build
|
||||
- name: Publish package
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||
packages-dir: gguf-py/dist
|
17
.github/workflows/labeler.yml
vendored
17
.github/workflows/labeler.yml
vendored
|
@ -1,17 +0,0 @@
|
|||
name: "Pull Request Labeler"
|
||||
on:
|
||||
- pull_request_target
|
||||
|
||||
jobs:
|
||||
labeler:
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: "ggerganov/llama.cpp"
|
||||
- uses: actions/labeler@v5
|
||||
with:
|
||||
configuration-path: '.github/labeler.yml'
|
72
.github/workflows/nix-ci-aarch64.yml
vendored
72
.github/workflows/nix-ci-aarch64.yml
vendored
|
@ -1,72 +0,0 @@
|
|||
name: Nix aarch64 builds
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
schedule:
|
||||
# Rebuild daily rather than on every push because QEMU is expensive (e.g.
|
||||
# 1.5h instead of minutes with the cold cache).
|
||||
#
|
||||
# randint(0, 59), randint(0, 23)
|
||||
- cron: '26 12 * * *'
|
||||
# But also rebuild if we touched any of the Nix expressions:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: ['**/*.nix', 'flake.lock']
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: ['**/*.nix', 'flake.lock']
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
nix-build-aarch64:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Install QEMU
|
||||
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y qemu-user-static qemu-system-aarch64
|
||||
sudo usermod -a -G kvm $USER
|
||||
- name: Install Nix
|
||||
uses: DeterminateSystems/nix-installer-action@v9
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
extra-conf: |
|
||||
extra-platforms = aarch64-linux
|
||||
extra-system-features = nixos-test kvm
|
||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||
with:
|
||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||
- name: Set-up cachix to push the results to
|
||||
uses: cachix/cachix-action@v13
|
||||
with:
|
||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||
name: llama-cpp
|
||||
- name: Show all output paths
|
||||
run: >
|
||||
nix run github:nix-community/nix-eval-jobs
|
||||
-- --gc-roots-dir gcroot
|
||||
--flake
|
||||
".#packages.aarch64-linux"
|
||||
- name: Build
|
||||
run: >
|
||||
nix run github:Mic92/nix-fast-build
|
||||
-- --skip-cached --no-nom
|
||||
--systems aarch64-linux
|
||||
--flake
|
||||
".#checks.aarch64-linux"
|
79
.github/workflows/nix-ci.yml
vendored
79
.github/workflows/nix-ci.yml
vendored
|
@ -1,79 +0,0 @@
|
|||
name: Nix CI
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
|
||||
id-token: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
nix-eval:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ ubuntu-latest, macos-latest ]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Nix
|
||||
uses: DeterminateSystems/nix-installer-action@v9
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
extra-conf: |
|
||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||
with:
|
||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||
- name: List all flake outputs
|
||||
run: nix flake show --all-systems
|
||||
- name: Show all output paths
|
||||
run: >
|
||||
nix run github:nix-community/nix-eval-jobs
|
||||
-- --gc-roots-dir gcroot
|
||||
--flake
|
||||
".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
||||
nix-build:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ ubuntu-latest, macos-latest ]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Nix
|
||||
uses: DeterminateSystems/nix-installer-action@v9
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
extra-conf: |
|
||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||
with:
|
||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||
- name: Set-up cachix to push the results to
|
||||
uses: cachix/cachix-action@v13
|
||||
with:
|
||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||
name: llama-cpp
|
||||
- name: Build
|
||||
run: >
|
||||
nix run github:Mic92/nix-fast-build
|
||||
-- --skip-cached --no-nom
|
||||
--flake
|
||||
".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
22
.github/workflows/nix-flake-update.yml
vendored
22
.github/workflows/nix-flake-update.yml
vendored
|
@ -1,22 +0,0 @@
|
|||
name: update-flake-lock
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
|
||||
|
||||
jobs:
|
||||
lockfile:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Nix
|
||||
uses: DeterminateSystems/nix-installer-action@main
|
||||
- name: Update flake.lock
|
||||
uses: DeterminateSystems/update-flake-lock@main
|
||||
with:
|
||||
pr-title: "nix: update flake.lock"
|
||||
pr-labels: |
|
||||
nix
|
||||
pr-reviewers: philiptaron,SomeoneSerge
|
||||
token: ${{ secrets.FLAKE_TOKEN }}
|
36
.github/workflows/nix-publish-flake.yml
vendored
36
.github/workflows/nix-publish-flake.yml
vendored
|
@ -1,36 +0,0 @@
|
|||
# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
|
||||
name: "Publish a flake to flakestry & flakehub"
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "The existing tag to publish"
|
||||
type: "string"
|
||||
required: true
|
||||
jobs:
|
||||
flakestry-publish:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: "write"
|
||||
contents: "read"
|
||||
steps:
|
||||
- uses: flakestry/flakestry-publish@main
|
||||
with:
|
||||
version: "${{ inputs.tag || github.ref_name }}"
|
||||
flakehub-publish:
|
||||
runs-on: "ubuntu-latest"
|
||||
permissions:
|
||||
id-token: "write"
|
||||
contents: "read"
|
||||
steps:
|
||||
- uses: "actions/checkout@v4"
|
||||
with:
|
||||
ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
|
||||
- uses: "DeterminateSystems/nix-installer-action@main"
|
||||
- uses: "DeterminateSystems/flakehub-push@main"
|
||||
with:
|
||||
visibility: "public"
|
||||
tag: "${{ inputs.tag }}"
|
33
.github/workflows/python-check-requirements.yml
vendored
33
.github/workflows/python-check-requirements.yml
vendored
|
@ -1,33 +0,0 @@
|
|||
name: Python check requirements.txt
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/python-check-requirements.yml'
|
||||
- 'scripts/check-requirements.sh'
|
||||
- 'convert*.py'
|
||||
- '**/requirements*.txt'
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/python-check-requirements.yml'
|
||||
- 'scripts/check-requirements.sh'
|
||||
- 'convert*.py'
|
||||
- '**/requirements*.txt'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
python-check-requirements:
|
||||
runs-on: ubuntu-latest
|
||||
name: check-requirements
|
||||
steps:
|
||||
- name: Check out source repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python environment
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Run check-requirements.sh script
|
||||
run: bash scripts/check-requirements.sh
|
23
.github/workflows/python-lint.yml
vendored
23
.github/workflows/python-lint.yml
vendored
|
@ -1,23 +0,0 @@
|
|||
name: flake8 Lint
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
flake8-lint:
|
||||
runs-on: ubuntu-latest
|
||||
name: Lint
|
||||
steps:
|
||||
- name: Check out source repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python environment
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: flake8 Lint
|
||||
uses: py-actions/flake8@v2
|
||||
with:
|
||||
plugins: "flake8-no-print"
|
40
.github/workflows/python-type-check.yml
vendored
40
.github/workflows/python-type-check.yml
vendored
|
@ -1,40 +0,0 @@
|
|||
name: Python Type-Check
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/python-type-check.yml'
|
||||
- 'pyrightconfig.json'
|
||||
- '**.py'
|
||||
- '**/requirements*.txt'
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/python-type-check.yml'
|
||||
- 'pyrightconfig.json'
|
||||
- '**.py'
|
||||
- '**/requirements*.txt'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
python-type-check:
|
||||
runs-on: ubuntu-latest
|
||||
name: pyright type-check
|
||||
steps:
|
||||
- name: Check out source repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Python environment
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install Python dependencies
|
||||
# TODO: use a venv
|
||||
run: pip install -r requirements/requirements-all.txt
|
||||
- name: Type-check with Pyright
|
||||
uses: jakebailey/pyright-action@v2
|
||||
with:
|
||||
version: 1.1.382
|
||||
level: warning
|
||||
warnings: true
|
190
.github/workflows/server.yml
vendored
190
.github/workflows/server.yml
vendored
|
@ -1,190 +0,0 @@
|
|||
# Server build and tests
|
||||
name: Server
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
inputs:
|
||||
sha:
|
||||
description: 'Commit SHA1 to build'
|
||||
required: false
|
||||
type: string
|
||||
slow_tests:
|
||||
description: 'Run slow tests'
|
||||
required: true
|
||||
type: boolean
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
||||
|
||||
env:
|
||||
LLAMA_LOG_COLORS: 1
|
||||
LLAMA_LOG_PREFIX: 1
|
||||
LLAMA_LOG_TIMESTAMPS: 1
|
||||
LLAMA_LOG_VERBOSITY: 10
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
server:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
|
||||
build_type: [RelWithDebInfo]
|
||||
include:
|
||||
- build_type: Release
|
||||
sanitizer: ""
|
||||
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
|
||||
|
||||
steps:
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install \
|
||||
build-essential \
|
||||
xxd \
|
||||
git \
|
||||
cmake \
|
||||
curl \
|
||||
wget \
|
||||
language-pack-en \
|
||||
libcurl4-openssl-dev
|
||||
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||
|
||||
- name: Python setup
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Tests dependencies
|
||||
id: test_dependencies
|
||||
run: |
|
||||
pip install -r examples/server/tests/requirements.txt
|
||||
|
||||
- name: Verify server deps
|
||||
id: verify_server_deps
|
||||
run: |
|
||||
git config --global --add safe.directory $(realpath .)
|
||||
cd examples/server
|
||||
git ls-files --others --modified
|
||||
git status
|
||||
./deps.sh
|
||||
git status
|
||||
not_ignored_files="$(git ls-files --others --modified)"
|
||||
echo "Modified files: ${not_ignored_files}"
|
||||
if [ -n "${not_ignored_files}" ]; then
|
||||
echo "Repository is dirty or server deps are not built as expected"
|
||||
echo "${not_ignored_files}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Build (no OpenMP)
|
||||
id: cmake_build_no_openmp
|
||||
if: ${{ matrix.sanitizer == 'THREAD' }}
|
||||
run: |
|
||||
cmake -B build \
|
||||
-DGGML_NATIVE=OFF \
|
||||
-DLLAMA_BUILD_SERVER=ON \
|
||||
-DLLAMA_CURL=ON \
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||
-DGGML_OPENMP=OFF ;
|
||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
if: ${{ matrix.sanitizer != 'THREAD' }}
|
||||
run: |
|
||||
cmake -B build \
|
||||
-DGGML_NATIVE=OFF \
|
||||
-DLLAMA_BUILD_SERVER=ON \
|
||||
-DLLAMA_CURL=ON \
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||
|
||||
- name: Tests
|
||||
id: server_integration_tests
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
PORT=8888 ./tests.sh
|
||||
|
||||
- name: Slow tests
|
||||
id: server_integration_tests_slow
|
||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
||||
|
||||
|
||||
server-windows:
|
||||
runs-on: windows-2019
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||
|
||||
- name: libCURL
|
||||
id: get_libcurl
|
||||
env:
|
||||
CURL_VERSION: 8.6.0_6
|
||||
run: |
|
||||
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
|
||||
mkdir $env:RUNNER_TEMP/libcurl
|
||||
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
||||
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
|
||||
|
||||
- name: Python setup
|
||||
id: setup_python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Tests dependencies
|
||||
id: test_dependencies
|
||||
run: |
|
||||
pip install -r examples/server/tests/requirements.txt
|
||||
|
||||
- name: Copy Libcurl
|
||||
id: prepare_libcurl
|
||||
run: |
|
||||
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
|
||||
|
||||
- name: Tests
|
||||
id: server_integration_tests
|
||||
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
$env:PYTHONIOENCODING = ":replace"
|
||||
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||
|
||||
- name: Slow tests
|
||||
id: server_integration_tests_slow
|
||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
behave.exe --stop --no-skipped --no-capture --tags slow
|
Loading…
Add table
Reference in a new issue