From 97693e7e9785ca4a42e7f131f4b6d102d288dcf8 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sat, 20 Jan 2024 11:52:39 +0800
Subject: [PATCH] increase pool buffers

---
 otherarch/ggml_v2-cuda-legacy.cu | 10 +++++-----
 otherarch/ggml_v3-cuda.cu        | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/otherarch/ggml_v2-cuda-legacy.cu b/otherarch/ggml_v2-cuda-legacy.cu
index e7053b764..26f6a7e08 100644
--- a/otherarch/ggml_v2-cuda-legacy.cu
+++ b/otherarch/ggml_v2-cuda-legacy.cu
@@ -385,7 +385,7 @@ static to_fp32_cuda_t ggml_v2_get_to_fp32_cuda(ggml_v2_type type) {
 }
 
 // buffer pool for cuda
-#define MAX_CUDA_BUFFERS 16
+#define MAX_CUDA_BUFFERS_V2 16
 
 struct scoped_spin_lock {
     std::atomic_flag& lock;
@@ -406,13 +406,13 @@ struct cuda_buffer {
     size_t size = 0;
 };
 
-static cuda_buffer g_cuda_buffer_pool[MAX_CUDA_BUFFERS];
+static cuda_buffer g_cuda_buffer_pool[MAX_CUDA_BUFFERS_V2];
 static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT;
 
 static void * ggml_v2_cuda_pool_malloc(size_t size, size_t * actual_size) {
     scoped_spin_lock lock(g_cuda_pool_lock);
 
-    for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
+    for (int i = 0; i < MAX_CUDA_BUFFERS_V2; ++i) {
         cuda_buffer& b = g_cuda_buffer_pool[i];
         if (b.size >= size && b.ptr != nullptr) {
             void * ptr = b.ptr;
@@ -431,7 +431,7 @@ static void * ggml_v2_cuda_pool_malloc(size_t size, size_t * actual_size) {
 static void ggml_v2_cuda_pool_free(void * ptr, size_t size) {
     scoped_spin_lock lock(g_cuda_pool_lock);
 
-    for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
+    for (int i = 0; i < MAX_CUDA_BUFFERS_V2; ++i) {
         cuda_buffer& b = g_cuda_buffer_pool[i];
         if (b.ptr == nullptr) {
             b.ptr = ptr;
@@ -439,7 +439,7 @@ static void ggml_v2_cuda_pool_free(void * ptr, size_t size) {
             return;
         }
     }
-    fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS\n");
+    fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS_V2\n");
     CUDA_CHECK(cudaFree(ptr));
 }
 
diff --git a/otherarch/ggml_v3-cuda.cu b/otherarch/ggml_v3-cuda.cu
index 90f8e8c12..8a682683d 100644
--- a/otherarch/ggml_v3-cuda.cu
+++ b/otherarch/ggml_v3-cuda.cu
@@ -7254,7 +7254,7 @@ static void im2col_f32_f16_cuda(const float* x, half* dst,
 }
 
 // buffer pool for cuda
-#define MAX_CUDA_BUFFERS 256
+#define MAX_CUDA_BUFFERS_V3 512
 
 struct scoped_spin_lock {
     std::atomic_flag& lock;
@@ -7278,7 +7278,7 @@ struct ggml_v3_cuda_buffer {
     size_t size = 0;
 };
 
-static ggml_v3_cuda_buffer g_cuda_buffer_pool[GGML_V3_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS];
+static ggml_v3_cuda_buffer g_cuda_buffer_pool[GGML_V3_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS_V3];
 static size_t g_cuda_pool_size[GGML_V3_CUDA_MAX_DEVICES] = {0};
 
 static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * actual_size) {
@@ -7289,7 +7289,7 @@ static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * act
     int worst_i = -1;
     size_t worst_size = 0; //largest unused buffer seen so far
 
-    for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
+    for (int i = 0; i < MAX_CUDA_BUFFERS_V3; ++i) {
         ggml_v3_cuda_buffer& b = g_cuda_buffer_pool[device][i];
         if (b.size > 0 && b.size >= size && b.size < best_size)
         {
@@ -7336,7 +7336,7 @@ static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * act
 static void ggml_v3_cuda_pool_free_leg(int device, void * ptr, size_t size) {
     scoped_spin_lock lock(g_cuda_pool_lock);
 
-    for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
+    for (int i = 0; i < MAX_CUDA_BUFFERS_V3; ++i) {
         ggml_v3_cuda_buffer& b = g_cuda_buffer_pool[device][i];
         if (b.ptr == nullptr) {
             b.ptr = ptr;
@@ -7344,7 +7344,7 @@ static void ggml_v3_cuda_pool_free_leg(int device, void * ptr, size_t size) {
             return;
         }
     }
-    fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS\n");
+    fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS_V3\n");
     ggml_v3_cuda_set_device(device);
     CUDA_CHECK(cudaFree(ptr));
     g_cuda_pool_size[device] -= size;