From 97693e7e9785ca4a42e7f131f4b6d102d288dcf8 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 20 Jan 2024 11:52:39 +0800 Subject: [PATCH] increase pool buffers --- otherarch/ggml_v2-cuda-legacy.cu | 10 +++++----- otherarch/ggml_v3-cuda.cu | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/otherarch/ggml_v2-cuda-legacy.cu b/otherarch/ggml_v2-cuda-legacy.cu index e7053b764..26f6a7e08 100644 --- a/otherarch/ggml_v2-cuda-legacy.cu +++ b/otherarch/ggml_v2-cuda-legacy.cu @@ -385,7 +385,7 @@ static to_fp32_cuda_t ggml_v2_get_to_fp32_cuda(ggml_v2_type type) { } // buffer pool for cuda -#define MAX_CUDA_BUFFERS 16 +#define MAX_CUDA_BUFFERS_V2 16 struct scoped_spin_lock { std::atomic_flag& lock; @@ -406,13 +406,13 @@ struct cuda_buffer { size_t size = 0; }; -static cuda_buffer g_cuda_buffer_pool[MAX_CUDA_BUFFERS]; +static cuda_buffer g_cuda_buffer_pool[MAX_CUDA_BUFFERS_V2]; static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT; static void * ggml_v2_cuda_pool_malloc(size_t size, size_t * actual_size) { scoped_spin_lock lock(g_cuda_pool_lock); - for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) { + for (int i = 0; i < MAX_CUDA_BUFFERS_V2; ++i) { cuda_buffer& b = g_cuda_buffer_pool[i]; if (b.size >= size && b.ptr != nullptr) { void * ptr = b.ptr; @@ -431,7 +431,7 @@ static void * ggml_v2_cuda_pool_malloc(size_t size, size_t * actual_size) { static void ggml_v2_cuda_pool_free(void * ptr, size_t size) { scoped_spin_lock lock(g_cuda_pool_lock); - for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) { + for (int i = 0; i < MAX_CUDA_BUFFERS_V2; ++i) { cuda_buffer& b = g_cuda_buffer_pool[i]; if (b.ptr == nullptr) { b.ptr = ptr; @@ -439,7 +439,7 @@ static void ggml_v2_cuda_pool_free(void * ptr, size_t size) { return; } } - fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS\n"); + fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS_V2\n"); CUDA_CHECK(cudaFree(ptr)); } diff --git a/otherarch/ggml_v3-cuda.cu b/otherarch/ggml_v3-cuda.cu index 90f8e8c12..8a682683d 100644 --- a/otherarch/ggml_v3-cuda.cu +++ b/otherarch/ggml_v3-cuda.cu @@ -7254,7 +7254,7 @@ static void im2col_f32_f16_cuda(const float* x, half* dst, } // buffer pool for cuda -#define MAX_CUDA_BUFFERS 256 +#define MAX_CUDA_BUFFERS_V3 512 struct scoped_spin_lock { std::atomic_flag& lock; @@ -7278,7 +7278,7 @@ struct ggml_v3_cuda_buffer { size_t size = 0; }; -static ggml_v3_cuda_buffer g_cuda_buffer_pool[GGML_V3_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS]; +static ggml_v3_cuda_buffer g_cuda_buffer_pool[GGML_V3_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS_V3]; static size_t g_cuda_pool_size[GGML_V3_CUDA_MAX_DEVICES] = {0}; static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * actual_size) { @@ -7289,7 +7289,7 @@ static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * act int worst_i = -1; size_t worst_size = 0; //largest unused buffer seen so far - for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) { + for (int i = 0; i < MAX_CUDA_BUFFERS_V3; ++i) { ggml_v3_cuda_buffer& b = g_cuda_buffer_pool[device][i]; if (b.size > 0 && b.size >= size && b.size < best_size) { @@ -7336,7 +7336,7 @@ static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * act static void ggml_v3_cuda_pool_free_leg(int device, void * ptr, size_t size) { scoped_spin_lock lock(g_cuda_pool_lock); - for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) { + for (int i = 0; i < MAX_CUDA_BUFFERS_V3; ++i) { ggml_v3_cuda_buffer& b = g_cuda_buffer_pool[device][i]; if (b.ptr == nullptr) { b.ptr = ptr; @@ -7344,7 +7344,7 @@ static void ggml_v3_cuda_pool_free_leg(int device, void * ptr, size_t size) { return; } } - fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS\n"); + fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS_V3\n"); ggml_v3_cuda_set_device(device); CUDA_CHECK(cudaFree(ptr)); g_cuda_pool_size[device] -= size;