fix cuda and macos compile issues

This commit is contained in:
Concedo 2025-09-12 20:53:42 +08:00
parent 6463f5c26b
commit a5580a32fb
6 changed files with 6 additions and 31 deletions

View file

@ -79,6 +79,8 @@ file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu")
list(APPEND GGML_SOURCES_CUDA ${SRCS})
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
list(APPEND GGML_SOURCES_CUDA ${SRCS})
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmf*.cu")
list(APPEND GGML_SOURCES_CUDA ${SRCS})
set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
@ -176,6 +178,8 @@ if (LLAMA_HIPBLAS)
list(APPEND GGML_SOURCES_ROCM ${SRCS})
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
list(APPEND GGML_SOURCES_ROCM ${SRCS})
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmf*.cu")
list(APPEND GGML_SOURCES_ROCM ${SRCS})
add_compile_definitions(GGML_USE_HIP GGML_USE_CUDA SD_USE_CUDA GGML_HIP_NO_VMM)
add_library(ggml-rocm ${GGML_SOURCES_CUDA})

View file

@ -191,6 +191,7 @@ endif
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu))
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmf*.cu))
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))

View file

@ -264,11 +264,6 @@ void dia_context::reset() {
struct dia_context * build_new_dia_context(struct dia_model * model, int n_threads, bool use_cpu) {
dia_context * dctx = new dia_context(model, n_threads);
if (!use_cpu) {
#ifdef GGML_USE_METAL
dctx->backend = ggml_backend_metal_init();
#endif
}
dctx->backend_cpu = ggml_backend_cpu_init();
dctx->set_threads();
dctx->build_schedule();
@ -280,9 +275,7 @@ static bool dia_kv_cache_init(struct dia_kv_cache * cache, dia_model * model, di
ggml_backend_buffer_type_t buft = nullptr;
// this will only really support cpu or metal for the time being;
if (dctx->backend != nullptr) {
#ifdef GGML_USE_METAL
buft = ggml_backend_metal_buffer_type();
#endif
} else {
buft = ggml_backend_cpu_buffer_type();
}

View file

@ -138,11 +138,6 @@ struct ggml_tensor * build_attn_mask(ggml_context * ctx, orpheus_context * octx,
orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads, bool use_cpu) {
orpheus_context * octx = new orpheus_context(model, n_threads);
if (!use_cpu) {
#ifdef GGML_USE_METAL
octx->backend = ggml_backend_metal_init();
#endif
}
octx->backend_cpu = ggml_backend_cpu_init();
octx->set_threads();
octx->build_schedule();
@ -153,9 +148,6 @@ orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads
void orpheus_runner::orpheus_kv_cache_init() {
ggml_backend_buffer_type_t buft = nullptr;
if (octx->backend != nullptr) {
#ifdef GGML_USE_METAL
buft = ggml_backend_metal_buffer_type();
#endif
} else {
buft = ggml_backend_cpu_buffer_type();
}

View file

@ -323,11 +323,6 @@ void parler_context::reset(int32_t n_output_heads) {
struct parler_context * build_new_parler_context(struct parler_tts_model * model, int n_threads, bool use_cpu) {
parler_context * pctx = new parler_context(model, n_threads);
if (!use_cpu) {
#ifdef GGML_USE_METAL
pctx->backend = ggml_backend_metal_init();
#endif
}
pctx->eos_seen.reserve(model->n_output_heads);
pctx->backend_cpu = ggml_backend_cpu_init();
pctx->set_threads();
@ -343,9 +338,6 @@ static bool parler_kv_cache_init(struct parler_kv_cache * cache, parler_tts_mode
ggml_backend_buffer_type_t buft = nullptr;
// this will only really support cpu or metal for the time being;
if (pctx->backend != nullptr) {
#ifdef GGML_USE_METAL
buft = ggml_backend_metal_buffer_type();
#endif
} else {
buft = ggml_backend_cpu_buffer_type();
}

View file

@ -50,9 +50,6 @@ void runner_context::set_threads() {
void runner_context::build_schedule(size_t max_nodes) {
backend_cpu_buffer = ggml_backend_cpu_buffer_type();
if (backend != nullptr) {
#ifdef GGML_USE_METAL
backend_buffer = ggml_backend_metal_buffer_type();
#endif
std::vector<ggml_backend_buffer_type_t> bufs = {backend_buffer, backend_cpu_buffer};
std::vector<ggml_backend_t> backs = {backend, backend_cpu};
sched = ggml_backend_sched_new(backs.data(), bufs.data(), 2, max_nodes, false, false);
@ -103,10 +100,6 @@ void tts_model::prep_buffers_and_context(bool cpu_only, float size_offset, uint3
backend = ggml_backend_cpu_init();
buffer = ggml_backend_cpu_buffer_type();
} else {
#ifdef GGML_USE_METAL
backend = ggml_backend_metal_init();
buffer = ggml_backend_metal_buffer_type();
#endif
// if use metal is not installed then we need to warn here
if (!backend || !buffer) {
TTS_ABORT("'GGML_USE_METAL' is not defined either set the model to use CPU only or install ggml with metal support.");