diff --git a/common/profiler.cpp b/common/profiler.cpp
index 99be85cb..5efcf459 100644
--- a/common/profiler.cpp
+++ b/common/profiler.cpp
@@ -21,6 +21,7 @@
 
 #ifdef GGML_USE_CUDA
     #include "ggml-cuda.h"
+    #include <cuda_runtime.h>
 #endif
 
 #include <cmath>
@@ -33,7 +34,6 @@
 #include <vector>
 #include <inttypes.h>
 #include <thread>
-#include <cuda_runtime.h>
 
 const char * device_name() {
     static char device_name[256];
@@ -522,10 +522,7 @@ float device_memory_bw(int n_thread) {
 }
 
 float device_cuda_memory_bw(struct llama_model * model) {
-#ifndef GGML_USE_CUDA
-    return 0.0f;
-#endif
-
+#ifdef GGML_USE_CUDA
     const int n_embd = llama_n_embd(model) * 2;
     std::vector<float> matrix_A(n_embd * n_embd, 1.0f);
 
@@ -581,6 +578,9 @@ float device_cuda_memory_bw(struct llama_model * model) {
     ggml_backend_free(backend);
 
     return bandwidth;
+#else
+    return 0.0f;
+#endif
 }
 
 int device_has_metal(void) {
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index a304ca55..73426a5d 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -17564,6 +17564,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
                 ggml_compute_forward_opt_step_adamw(params, tensor);
             }
             break;
+        case GGML_OP_READ:
         case GGML_OP_NONE:
             {
                 // nop
@@ -18719,6 +18720,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
             {
                 GGML_ABORT("fatal error"); // not supported
             }
+        case GGML_OP_READ:
         case GGML_OP_NONE:
             {
                 // nop