From 43e8995e754b8e04642e92822055d193a3272b37 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 7 Apr 2024 16:08:12 +0300 Subject: [PATCH 1/7] scripts : sync ggml-cuda folder --- scripts/sync-ggml-am.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh index 3003290f6..3ae7f460d 100755 --- a/scripts/sync-ggml-am.sh +++ b/scripts/sync-ggml-am.sh @@ -60,6 +60,7 @@ while read c; do src/ggml*.m \ src/ggml*.metal \ src/ggml*.cu \ + src/ggml-cuda/* \ tests/test-opt.cpp \ tests/test-grad0.cpp \ tests/test-quantize-fns.cpp \ From f77261a7c525fa1fa47b18a3d78cd308ae41cafc Mon Sep 17 00:00:00 2001 From: Slava Primenko Date: Thu, 4 Apr 2024 14:49:24 +0200 Subject: [PATCH 2/7] ggml: bypass code incompatible with CUDA < 11.1 (whisper/2020) `cudaHostRegisterReadOnly` parameter was only introduced in CUDA 11.1 See this issue for more details: https://github.com/ggerganov/examples/whisper/whisper.cpp/issues/2007 --- ggml-cuda.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index f51b2042d..ce28cb55d 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2617,6 +2617,7 @@ GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size return false; } +#if CUDART_VERSION >= 11100 cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterPortable | cudaHostRegisterReadOnly); if (err != cudaSuccess) { // clear the error @@ -2627,6 +2628,9 @@ GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size return false; } return true; +#else + return false; +#endif } GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer) { From c37247796b4d45bdbbc8259afffb80208ad8fe55 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 7 Apr 2024 17:05:51 +0300 Subject: [PATCH 3/7] sync : ggml --- scripts/sync-ggml.last | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index bbe7e3084..82195550e 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -8e413034b42e4fbedc2873166f61193b75f2622a +bb8d8cff851b2de6fde4904be492d39458837e1a From e0717e751e12af13f4eedaae8bbbd608e40d7e54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?DAN=E2=84=A2?= Date: Sun, 7 Apr 2024 13:33:59 -0400 Subject: [PATCH 4/7] Add GritLM as supported models. (#6513) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index bd3f9cff5..20dd783dc 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ Typically finetunes of the base models below are supported as well. - [x] [Xverse](https://huggingface.co/models?search=xverse) - [x] [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01) - [x] [SEA-LION](https://huggingface.co/models?search=sea-lion) +- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B) **Multimodal models:** From b909236c0bf0b6e872af95df9490492ecec310ac Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 7 Apr 2024 21:25:30 +0300 Subject: [PATCH 5/7] flake.lock: Update (#6517) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'flake-parts': 'github:hercules-ci/flake-parts/f7b3c975cf067e56e7cda6cb098ebe3fb4d74ca2' (2024-03-01) → 'github:hercules-ci/flake-parts/9126214d0a59633752a136528f5f3b9aa8565b7d' (2024-04-01) • Updated input 'flake-parts/nixpkgs-lib': 'github:NixOS/nixpkgs/1536926ef5621b09bba54035ae2bb6d806d72ac8?dir=lib' (2024-02-29) → 'github:NixOS/nixpkgs/d8fe5e6c92d0d190646fb9f1056741a229980089?dir=lib' (2024-03-29) • Updated input 'nixpkgs': 'github:NixOS/nixpkgs/d8fe5e6c92d0d190646fb9f1056741a229980089' (2024-03-29) → 'github:NixOS/nixpkgs/fd281bd6b7d3e32ddfa399853946f782553163b5' (2024-04-03) Co-authored-by: github-actions[bot] --- flake.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/flake.lock b/flake.lock index c32725112..ed48dd8da 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "nixpkgs-lib": "nixpkgs-lib" }, "locked": { - "lastModified": 1709336216, - "narHash": "sha256-Dt/wOWeW6Sqm11Yh+2+t0dfEWxoMxGBvv3JpIocFl9E=", + "lastModified": 1712014858, + "narHash": "sha256-sB4SWl2lX95bExY2gMFG5HIzvva5AVMJd4Igm+GpZNw=", "owner": "hercules-ci", "repo": "flake-parts", - "rev": "f7b3c975cf067e56e7cda6cb098ebe3fb4d74ca2", + "rev": "9126214d0a59633752a136528f5f3b9aa8565b7d", "type": "github" }, "original": { @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1711703276, - "narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=", + "lastModified": 1712163089, + "narHash": "sha256-Um+8kTIrC19vD4/lUCN9/cU9kcOsD1O1m+axJqQPyMM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "d8fe5e6c92d0d190646fb9f1056741a229980089", + "rev": "fd281bd6b7d3e32ddfa399853946f782553163b5", "type": "github" }, "original": { @@ -37,11 +37,11 @@ "nixpkgs-lib": { "locked": { "dir": "lib", - "lastModified": 1709237383, - "narHash": "sha256-cy6ArO4k5qTx+l5o+0mL9f5fa86tYUX3ozE1S+Txlds=", + "lastModified": 1711703276, + "narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "1536926ef5621b09bba54035ae2bb6d806d72ac8", + "rev": "d8fe5e6c92d0d190646fb9f1056741a229980089", "type": "github" }, "original": { From 855f54402e866ed19d8d675b56a81c844c64b325 Mon Sep 17 00:00:00 2001 From: Mark Fairbairn Date: Sun, 7 Apr 2024 19:52:19 +0100 Subject: [PATCH 6/7] Change Windows AMD example to release build to make inference much faster. (#6525) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 20dd783dc..2fbbb4a0c 100644 --- a/README.md +++ b/README.md @@ -519,7 +519,7 @@ Building the program with BLAS support may lead to some performance improvements set PATH=%HIP_PATH%\bin;%PATH% mkdir build cd build - cmake -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ .. + cmake -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release .. cmake --build . ``` Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors) From d752327c3338d5b9634121d651c0105f2c933f9b Mon Sep 17 00:00:00 2001 From: Firat Date: Mon, 8 Apr 2024 00:48:29 -0700 Subject: [PATCH 7/7] Adding KodiBot to UI list (#6535) KodiBot is free and open source ai chat app released under the GNU General Public License. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2fbbb4a0c..6d9961120 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,7 @@ Unless otherwise noted these projects are open-source with permissive licensing: - [KanTV](https://github.com/zhouwg/kantv?tab=readme-ov-file)(Apachev2.0 or later) - [Dot](https://github.com/alexpinel/Dot) (GPL) - [MindMac](https://mindmac.app) (proprietary) +- [KodiBot](https://github.com/firatkiral/kodibot) (GPL) *(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*