fix cuda compatibility errors

2025-09-08 03:49:02 +00:00 · 2024-11-26 22:35:58 +04:00 · 2024-11-26 22:35:58 +04:00 · 0a91ad3edc
commit 0a91ad3edc
parent 3f008f2ad9
2 changed files with 7 additions and 5 deletions
--- a/common/profiler.cpp
+++ b/common/profiler.cpp
@ -21,6 +21,7 @@

 #ifdef GGML_USE_CUDA
    #include "ggml-cuda.h"
+    #include <cuda_runtime.h>
 #endif

 #include <cmath>
@ -33,7 +34,6 @@
 #include <vector>
 #include <inttypes.h>
 #include <thread>
-#include <cuda_runtime.h>

 const char * device_name() {
    static char device_name[256];
@ -522,10 +522,7 @@ float device_memory_bw(int n_thread) {
 }

 float device_cuda_memory_bw(struct llama_model * model) {
-#ifndef GGML_USE_CUDA
-    return 0.0f;
-#endif
-
+#ifdef GGML_USE_CUDA
    const int n_embd = llama_n_embd(model) * 2;
    std::vector<float> matrix_A(n_embd * n_embd, 1.0f);

@ -581,6 +578,9 @@ float device_cuda_memory_bw(struct llama_model * model) {
    ggml_backend_free(backend);

    return bandwidth;
+#else
+    return 0.0f;
+#endif
 }

 int device_has_metal(void) {
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -17564,6 +17564,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
                ggml_compute_forward_opt_step_adamw(params, tensor);
            }
            break;
+        case GGML_OP_READ:
        case GGML_OP_NONE:
            {
                // nop
@ -18719,6 +18720,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
            {
                GGML_ABORT("fatal error"); // not supported
            }
+        case GGML_OP_READ:
        case GGML_OP_NONE:
            {
                // nop