diff --git a/common/profiler.cpp b/common/profiler.cpp index 99be85cb..5efcf459 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -21,6 +21,7 @@ #ifdef GGML_USE_CUDA #include "ggml-cuda.h" + #include #endif #include @@ -33,7 +34,6 @@ #include #include #include -#include const char * device_name() { static char device_name[256]; @@ -522,10 +522,7 @@ float device_memory_bw(int n_thread) { } float device_cuda_memory_bw(struct llama_model * model) { -#ifndef GGML_USE_CUDA - return 0.0f; -#endif - +#ifdef GGML_USE_CUDA const int n_embd = llama_n_embd(model) * 2; std::vector matrix_A(n_embd * n_embd, 1.0f); @@ -581,6 +578,9 @@ float device_cuda_memory_bw(struct llama_model * model) { ggml_backend_free(backend); return bandwidth; +#else + return 0.0f; +#endif } int device_has_metal(void) { diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index a304ca55..73426a5d 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -17564,6 +17564,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm ggml_compute_forward_opt_step_adamw(params, tensor); } break; + case GGML_OP_READ: case GGML_OP_NONE: { // nop @@ -18719,6 +18720,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor { GGML_ABORT("fatal error"); // not supported } + case GGML_OP_READ: case GGML_OP_NONE: { // nop