mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
fix cuda and macos compile issues
This commit is contained in:
parent
6463f5c26b
commit
a5580a32fb
6 changed files with 6 additions and 31 deletions
|
|
@ -79,6 +79,8 @@ file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu")
|
|||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmf*.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||
set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
|
||||
set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
|
||||
set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
|
||||
|
|
@ -176,6 +178,8 @@ if (LLAMA_HIPBLAS)
|
|||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmf*.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||
add_compile_definitions(GGML_USE_HIP GGML_USE_CUDA SD_USE_CUDA GGML_HIP_NO_VMM)
|
||||
add_library(ggml-rocm ${GGML_SOURCES_CUDA})
|
||||
|
||||
|
|
|
|||
1
Makefile
1
Makefile
|
|
@ -191,6 +191,7 @@ endif
|
|||
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
|
||||
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmf*.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
|
||||
|
|
|
|||
|
|
@ -264,11 +264,6 @@ void dia_context::reset() {
|
|||
|
||||
struct dia_context * build_new_dia_context(struct dia_model * model, int n_threads, bool use_cpu) {
|
||||
dia_context * dctx = new dia_context(model, n_threads);
|
||||
if (!use_cpu) {
|
||||
#ifdef GGML_USE_METAL
|
||||
dctx->backend = ggml_backend_metal_init();
|
||||
#endif
|
||||
}
|
||||
dctx->backend_cpu = ggml_backend_cpu_init();
|
||||
dctx->set_threads();
|
||||
dctx->build_schedule();
|
||||
|
|
@ -280,9 +275,7 @@ static bool dia_kv_cache_init(struct dia_kv_cache * cache, dia_model * model, di
|
|||
ggml_backend_buffer_type_t buft = nullptr;
|
||||
// this will only really support cpu or metal for the time being;
|
||||
if (dctx->backend != nullptr) {
|
||||
#ifdef GGML_USE_METAL
|
||||
buft = ggml_backend_metal_buffer_type();
|
||||
#endif
|
||||
|
||||
} else {
|
||||
buft = ggml_backend_cpu_buffer_type();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,11 +138,6 @@ struct ggml_tensor * build_attn_mask(ggml_context * ctx, orpheus_context * octx,
|
|||
|
||||
orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads, bool use_cpu) {
|
||||
orpheus_context * octx = new orpheus_context(model, n_threads);
|
||||
if (!use_cpu) {
|
||||
#ifdef GGML_USE_METAL
|
||||
octx->backend = ggml_backend_metal_init();
|
||||
#endif
|
||||
}
|
||||
octx->backend_cpu = ggml_backend_cpu_init();
|
||||
octx->set_threads();
|
||||
octx->build_schedule();
|
||||
|
|
@ -153,9 +148,6 @@ orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads
|
|||
void orpheus_runner::orpheus_kv_cache_init() {
|
||||
ggml_backend_buffer_type_t buft = nullptr;
|
||||
if (octx->backend != nullptr) {
|
||||
#ifdef GGML_USE_METAL
|
||||
buft = ggml_backend_metal_buffer_type();
|
||||
#endif
|
||||
} else {
|
||||
buft = ggml_backend_cpu_buffer_type();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -323,11 +323,6 @@ void parler_context::reset(int32_t n_output_heads) {
|
|||
|
||||
struct parler_context * build_new_parler_context(struct parler_tts_model * model, int n_threads, bool use_cpu) {
|
||||
parler_context * pctx = new parler_context(model, n_threads);
|
||||
if (!use_cpu) {
|
||||
#ifdef GGML_USE_METAL
|
||||
pctx->backend = ggml_backend_metal_init();
|
||||
#endif
|
||||
}
|
||||
pctx->eos_seen.reserve(model->n_output_heads);
|
||||
pctx->backend_cpu = ggml_backend_cpu_init();
|
||||
pctx->set_threads();
|
||||
|
|
@ -343,9 +338,6 @@ static bool parler_kv_cache_init(struct parler_kv_cache * cache, parler_tts_mode
|
|||
ggml_backend_buffer_type_t buft = nullptr;
|
||||
// this will only really support cpu or metal for the time being;
|
||||
if (pctx->backend != nullptr) {
|
||||
#ifdef GGML_USE_METAL
|
||||
buft = ggml_backend_metal_buffer_type();
|
||||
#endif
|
||||
} else {
|
||||
buft = ggml_backend_cpu_buffer_type();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,9 +50,6 @@ void runner_context::set_threads() {
|
|||
void runner_context::build_schedule(size_t max_nodes) {
|
||||
backend_cpu_buffer = ggml_backend_cpu_buffer_type();
|
||||
if (backend != nullptr) {
|
||||
#ifdef GGML_USE_METAL
|
||||
backend_buffer = ggml_backend_metal_buffer_type();
|
||||
#endif
|
||||
std::vector<ggml_backend_buffer_type_t> bufs = {backend_buffer, backend_cpu_buffer};
|
||||
std::vector<ggml_backend_t> backs = {backend, backend_cpu};
|
||||
sched = ggml_backend_sched_new(backs.data(), bufs.data(), 2, max_nodes, false, false);
|
||||
|
|
@ -103,10 +100,6 @@ void tts_model::prep_buffers_and_context(bool cpu_only, float size_offset, uint3
|
|||
backend = ggml_backend_cpu_init();
|
||||
buffer = ggml_backend_cpu_buffer_type();
|
||||
} else {
|
||||
#ifdef GGML_USE_METAL
|
||||
backend = ggml_backend_metal_init();
|
||||
buffer = ggml_backend_metal_buffer_type();
|
||||
#endif
|
||||
// if use metal is not installed then we need to warn here
|
||||
if (!backend || !buffer) {
|
||||
TTS_ABORT("'GGML_USE_METAL' is not defined either set the model to use CPU only or install ggml with metal support.");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue