fix cuda and macos compile issues

2026-05-18 23:49:46 +00:00 · 2025-09-12 20:53:42 +08:00 · 2025-09-12 20:53:42 +08:00 · a5580a32fb
commit a5580a32fb
parent 6463f5c26b
6 changed files with 6 additions and 31 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -79,6 +79,8 @@ file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-mma*.cu")
 list(APPEND GGML_SOURCES_CUDA ${SRCS})
 file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
 list(APPEND GGML_SOURCES_CUDA ${SRCS})
+file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmf*.cu")
+list(APPEND GGML_SOURCES_CUDA ${SRCS})
 set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
 set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
 set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
@ -176,6 +178,8 @@ if (LLAMA_HIPBLAS)
        list(APPEND GGML_SOURCES_ROCM ${SRCS})
        file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
        list(APPEND GGML_SOURCES_ROCM ${SRCS})
+        file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmf*.cu")
+        list(APPEND GGML_SOURCES_ROCM ${SRCS})
        add_compile_definitions(GGML_USE_HIP GGML_USE_CUDA SD_USE_CUDA GGML_HIP_NO_VMM)
        add_library(ggml-rocm ${GGML_SOURCES_CUDA})

--- a/1
+++ b/1
@ -191,6 +191,7 @@ endif
 # it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
 OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu))
 OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
+OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmf*.cu))
 OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
 OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
 OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
--- a/otherarch/ttscpp/src/dia_model.cpp
+++ b/otherarch/ttscpp/src/dia_model.cpp
@ -264,11 +264,6 @@ void dia_context::reset() {

 struct dia_context * build_new_dia_context(struct dia_model * model, int n_threads, bool use_cpu) {
    dia_context * dctx = new dia_context(model, n_threads);
-    if (!use_cpu) {
-#ifdef GGML_USE_METAL
-        dctx->backend = ggml_backend_metal_init();
-#endif
-    }
    dctx->backend_cpu = ggml_backend_cpu_init();
    dctx->set_threads();
    dctx->build_schedule();
@ -280,9 +275,7 @@ static bool dia_kv_cache_init(struct dia_kv_cache * cache, dia_model * model, di
    ggml_backend_buffer_type_t buft = nullptr;
    // this will only really support cpu or metal for the time being;
    if (dctx->backend != nullptr) {
-#ifdef GGML_USE_METAL
-        buft = ggml_backend_metal_buffer_type();
-#endif
+
    } else {
        buft = ggml_backend_cpu_buffer_type();
    }
--- a/otherarch/ttscpp/src/orpheus_model.cpp
+++ b/otherarch/ttscpp/src/orpheus_model.cpp
@ -138,11 +138,6 @@ struct ggml_tensor * build_attn_mask(ggml_context * ctx, orpheus_context * octx,

 orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads, bool use_cpu) {
    orpheus_context * octx = new orpheus_context(model, n_threads);
-    if (!use_cpu) {
-#ifdef GGML_USE_METAL
-        octx->backend = ggml_backend_metal_init();
-#endif
-    }
    octx->backend_cpu = ggml_backend_cpu_init();
    octx->set_threads();
    octx->build_schedule();
@ -153,9 +148,6 @@ orpheus_context * build_new_orpheus_context(orpheus_model * model, int n_threads
 void orpheus_runner::orpheus_kv_cache_init() {
    ggml_backend_buffer_type_t buft = nullptr;
    if (octx->backend != nullptr) {
-#ifdef GGML_USE_METAL
-        buft = ggml_backend_metal_buffer_type();
-#endif
    } else {
        buft = ggml_backend_cpu_buffer_type();
    }
--- a/otherarch/ttscpp/src/parler_model.cpp
+++ b/otherarch/ttscpp/src/parler_model.cpp
@ -323,11 +323,6 @@ void parler_context::reset(int32_t n_output_heads) {

 struct parler_context * build_new_parler_context(struct parler_tts_model * model, int n_threads, bool use_cpu) {
    parler_context * pctx = new parler_context(model, n_threads);
-    if (!use_cpu) {
-#ifdef GGML_USE_METAL
-        pctx->backend = ggml_backend_metal_init();
-#endif
-    }
    pctx->eos_seen.reserve(model->n_output_heads);
    pctx->backend_cpu = ggml_backend_cpu_init();
    pctx->set_threads();
@ -343,9 +338,6 @@ static bool parler_kv_cache_init(struct parler_kv_cache * cache, parler_tts_mode
    ggml_backend_buffer_type_t buft = nullptr;
    // this will only really support cpu or metal for the time being;
    if (pctx->backend != nullptr) {
-#ifdef GGML_USE_METAL
-        buft = ggml_backend_metal_buffer_type();
-#endif
    } else {
        buft = ggml_backend_cpu_buffer_type();
    }
--- a/otherarch/ttscpp/src/tts_model.cpp
+++ b/otherarch/ttscpp/src/tts_model.cpp
@ -50,9 +50,6 @@ void runner_context::set_threads() {
 void runner_context::build_schedule(size_t max_nodes) {
    backend_cpu_buffer = ggml_backend_cpu_buffer_type();
    if (backend != nullptr) {
-#ifdef GGML_USE_METAL
-        backend_buffer = ggml_backend_metal_buffer_type();
-#endif
        std::vector<ggml_backend_buffer_type_t> bufs = {backend_buffer, backend_cpu_buffer};
        std::vector<ggml_backend_t> backs = {backend, backend_cpu};
        sched = ggml_backend_sched_new(backs.data(), bufs.data(), 2, max_nodes, false, false);
@ -103,10 +100,6 @@ void tts_model::prep_buffers_and_context(bool cpu_only, float size_offset, uint3
        backend = ggml_backend_cpu_init();
        buffer = ggml_backend_cpu_buffer_type();
    } else {
-#ifdef GGML_USE_METAL
-        backend = ggml_backend_metal_init();
-        buffer = ggml_backend_metal_buffer_type();
-#endif
        // if use metal is not installed then we need to warn here
        if (!backend || !buffer) {
            TTS_ABORT("'GGML_USE_METAL' is not defined either set the model to use CPU only or install ggml with metal support.");