Merge branch 'vulkan_test' into concedo_experimental

# Conflicts:
#	CMakeLists.txt
#	Makefile
#	llama.cpp
This commit is contained in:
Concedo 2024-01-25 23:01:44 +08:00
commit 2a4a7241e6
117 changed files with 394082 additions and 68 deletions

View file

@ -1,6 +1,8 @@
#define LLAMA_API_INTERNAL
#include "llama.h"
#include <iostream>
#include "unicode.h"
#include "ggml.h"
@ -13,6 +15,9 @@
#if defined(GGML_USE_CLBLAST)
#include "ggml-opencl.h"
#endif
#if defined(GGML_USE_VULKAN)
# include "ggml-vulkan.h"
#endif
#ifdef GGML_USE_METAL
# include "ggml-metal.h"
@ -1263,6 +1268,10 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer
}
#elif defined(GGML_USE_CPU_HBM)
buft = ggml_backend_cpu_hbm_buffer_type();
#elif defined(GGML_USE_VULKAN)
if (host_buffer) {
buft = ggml_backend_vk_host_buffer_type();
}
#endif
if (buft == nullptr) {
@ -1280,6 +1289,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) {
buft = ggml_backend_metal_buffer_type();
#elif defined(GGML_USE_CUBLAS)
buft = ggml_backend_cuda_buffer_type(gpu);
#elif defined(GGML_USE_VULKAN)
buft = ggml_backend_vk_buffer_type();
#elif defined(GGML_USE_CLBLAST)
buft = ggml_backend_opencl_buffer_type();
#endif
@ -6685,7 +6696,7 @@ static int llama_decode_internal(
}
const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
if (ggml_cpu_has_cublas() && fully_offloaded) {
if ((ggml_cpu_has_cublas() || ggml_cpu_has_vulkan()) && fully_offloaded) {
n_threads = 1;
}
@ -10215,6 +10226,16 @@ struct llama_context * llama_new_context_with_model(
}
}
}
#elif defined(GGML_USE_VULKAN)
if (model->n_gpu_layers > 0) {
ggml_backend_t backend = ggml_backend_vk_init();
if (backend == nullptr) {
LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__);
llama_free(ctx);
return nullptr;
}
ctx->backends.push_back(backend);
}
#endif
ctx->backend_cpu = ggml_backend_cpu_init();
if (ctx->backend_cpu == nullptr) {