increase pool buffers

This commit is contained in:
Concedo 2024-01-20 11:52:39 +08:00
parent 21f0ce2502
commit 97693e7e97
2 changed files with 10 additions and 10 deletions

View file

@ -385,7 +385,7 @@ static to_fp32_cuda_t ggml_v2_get_to_fp32_cuda(ggml_v2_type type) {
}
// buffer pool for cuda
#define MAX_CUDA_BUFFERS 16
#define MAX_CUDA_BUFFERS_V2 16
struct scoped_spin_lock {
std::atomic_flag& lock;
@ -406,13 +406,13 @@ struct cuda_buffer {
size_t size = 0;
};
static cuda_buffer g_cuda_buffer_pool[MAX_CUDA_BUFFERS];
static cuda_buffer g_cuda_buffer_pool[MAX_CUDA_BUFFERS_V2];
static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT;
static void * ggml_v2_cuda_pool_malloc(size_t size, size_t * actual_size) {
scoped_spin_lock lock(g_cuda_pool_lock);
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
for (int i = 0; i < MAX_CUDA_BUFFERS_V2; ++i) {
cuda_buffer& b = g_cuda_buffer_pool[i];
if (b.size >= size && b.ptr != nullptr) {
void * ptr = b.ptr;
@ -431,7 +431,7 @@ static void * ggml_v2_cuda_pool_malloc(size_t size, size_t * actual_size) {
static void ggml_v2_cuda_pool_free(void * ptr, size_t size) {
scoped_spin_lock lock(g_cuda_pool_lock);
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
for (int i = 0; i < MAX_CUDA_BUFFERS_V2; ++i) {
cuda_buffer& b = g_cuda_buffer_pool[i];
if (b.ptr == nullptr) {
b.ptr = ptr;
@ -439,7 +439,7 @@ static void ggml_v2_cuda_pool_free(void * ptr, size_t size) {
return;
}
}
fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS\n");
fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS_V2\n");
CUDA_CHECK(cudaFree(ptr));
}

View file

@ -7254,7 +7254,7 @@ static void im2col_f32_f16_cuda(const float* x, half* dst,
}
// buffer pool for cuda
#define MAX_CUDA_BUFFERS 256
#define MAX_CUDA_BUFFERS_V3 512
struct scoped_spin_lock {
std::atomic_flag& lock;
@ -7278,7 +7278,7 @@ struct ggml_v3_cuda_buffer {
size_t size = 0;
};
static ggml_v3_cuda_buffer g_cuda_buffer_pool[GGML_V3_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS];
static ggml_v3_cuda_buffer g_cuda_buffer_pool[GGML_V3_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS_V3];
static size_t g_cuda_pool_size[GGML_V3_CUDA_MAX_DEVICES] = {0};
static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * actual_size) {
@ -7289,7 +7289,7 @@ static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * act
int worst_i = -1;
size_t worst_size = 0; //largest unused buffer seen so far
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
for (int i = 0; i < MAX_CUDA_BUFFERS_V3; ++i) {
ggml_v3_cuda_buffer& b = g_cuda_buffer_pool[device][i];
if (b.size > 0 && b.size >= size && b.size < best_size)
{
@ -7336,7 +7336,7 @@ static void * ggml_v3_cuda_pool_malloc_leg(int device, size_t size, size_t * act
static void ggml_v3_cuda_pool_free_leg(int device, void * ptr, size_t size) {
scoped_spin_lock lock(g_cuda_pool_lock);
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
for (int i = 0; i < MAX_CUDA_BUFFERS_V3; ++i) {
ggml_v3_cuda_buffer& b = g_cuda_buffer_pool[device][i];
if (b.ptr == nullptr) {
b.ptr = ptr;
@ -7344,7 +7344,7 @@ static void ggml_v3_cuda_pool_free_leg(int device, void * ptr, size_t size) {
return;
}
}
fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS\n");
fprintf(stderr, "WARNING: cuda buffer pool full, increase MAX_CUDA_BUFFERS_V3\n");
ggml_v3_cuda_set_device(device);
CUDA_CHECK(cudaFree(ptr));
g_cuda_pool_size[device] -= size;