TP: fix entirely zero-sized slices per device (#23525)

This commit is contained in:
Johannes Gäßler 2026-05-24 08:19:33 +02:00 committed by GitHub
parent f3061116ff
commit fff63b5108
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 35 additions and 2 deletions

View file

@ -76,6 +76,7 @@ GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_i
// Utils
// Create a buffer and allocate all the tensors in a ggml_context
// ggml_backend_alloc_ctx_tensors_from_buft_size returns the size of the buffer that would be allocated by ggml_backend_alloc_ctx_tensors_from_buft
// ggml_backend_alloc_ctx_tensors_from_buft returns NULL on failure or if all tensors in ctx are already allocated or zero-sized
GGML_API size_t ggml_backend_alloc_ctx_tensors_from_buft_size(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);

View file

@ -1275,6 +1275,9 @@ static void ggml_backend_meta_buffer_set_tensor(ggml_backend_buffer_t buffer, gg
for (size_t j = 0; j < n_bufs; j++) {
ggml_tensor * simple_tensor = ggml_backend_meta_buffer_simple_tensor(tensor, j);
const size_t chunk_size_j = simple_tensor->nb[split_state.axis + 1];
if (chunk_size_j == 0) {
continue;
}
const size_t simple_offset = i_start * chunk_size_j;
ggml_backend_tensor_set_2d(simple_tensor, (const char *) data + offset_j, simple_offset, chunk_size_j, i_stop - i_start, chunk_size_j, chunk_size_full);
offset_j += chunk_size_j;
@ -1382,6 +1385,9 @@ static void ggml_backend_meta_buffer_get_tensor(ggml_backend_buffer_t buffer, co
for (size_t j = 0; j < n_bufs; j++){
const ggml_tensor * simple_tensor = ggml_backend_meta_buffer_simple_tensor(tensor, j);
const size_t chunk_size_j = simple_tensor->nb[split_state.axis + 1];
if (chunk_size_j == 0) {
continue;
}
const size_t simple_offset = i_start * chunk_size_j;
ggml_backend_tensor_get_2d(simple_tensor, (char *) data + offset_j, simple_offset, chunk_size_j, i_stop - i_start, chunk_size_j, chunk_size_full);
offset_j += chunk_size_j;
@ -1445,6 +1451,7 @@ static ggml_backend_buffer_t ggml_backend_meta_buffer_type_alloc_buffer(ggml_bac
buf_ctx->buf_configs.reserve(n_simple_bufts);
for (size_t i = 0; i < n_simple_bufts; i++) {
ggml_backend_buffer_t simple_buf = ggml_backend_buft_alloc_buffer(ggml_backend_meta_buft_simple_buft(buft, i), size);
GGML_ASSERT(simple_buf != nullptr);
max_size = std::max(max_size, ggml_backend_buffer_get_size(simple_buf));
buf_ctx->buf_configs.emplace_back(ggml_init(params), simple_buf);
}
@ -1474,8 +1481,27 @@ struct ggml_backend_buffer * ggml_backend_meta_alloc_ctx_tensors_from_buft(struc
t->data = (void *) 0x2000000000000000; // FIXME
}
for (size_t i = 0; i < n_simple_bufts; i++) {
meta_buf_ctx->buf_configs[i].buf = ggml_backend_alloc_ctx_tensors_from_buft(
meta_buf_ctx->buf_configs[i].ctx, ggml_backend_meta_buft_simple_buft(buft, i));
ggml_context * ctx = meta_buf_ctx->buf_configs[i].ctx;
ggml_backend_buffer_type_t simple_buft = ggml_backend_meta_buft_simple_buft(buft, i);
// If a ggml_context only has zero-sized tensors, ggml_backend_alloc_ctx_tensors_from_buft returns NULL.
// For those edge cases, allocate a dummy buffer instead.
bool any_nonzero_slice = false;
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
if (ggml_nelements(t) != 0) {
any_nonzero_slice = true;
break;
}
}
if (any_nonzero_slice) {
meta_buf_ctx->buf_configs[i].buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, simple_buft);
} else {
meta_buf_ctx->buf_configs[i].buf = ggml_backend_buft_alloc_buffer(simple_buft, 0);
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
t->buffer = meta_buf_ctx->buf_configs[i].buf;
}
}
GGML_ASSERT(meta_buf_ctx->buf_configs[i].buf != nullptr);
meta_buf->size = std::max(meta_buf->size, ggml_backend_buffer_get_size(meta_buf_ctx->buf_configs[i].buf));
}
return meta_buf;
@ -1605,6 +1631,9 @@ static void ggml_backend_meta_set_tensor_async(ggml_backend_t backend, ggml_tens
ggml_backend_t simple_backend = ggml_backend_meta_simple_backend(backend, j);
ggml_tensor * simple_tensor = ggml_backend_meta_buffer_simple_tensor(tensor, j);
const size_t chunk_size_j = simple_tensor->nb[split_state.axis + 1];
if (chunk_size_j == 0) {
continue;
}
ggml_backend_tensor_set_2d_async(simple_backend, simple_tensor, (const char *) data + offset_j, offset, chunk_size_j,
i_stop - i_start, chunk_size_j, chunk_size_full);
offset_j += chunk_size_j;
@ -1646,6 +1675,9 @@ static void ggml_backend_meta_get_tensor_async(ggml_backend_t backend, const ggm
ggml_backend_t simple_backend = ggml_backend_meta_simple_backend(backend, j);
const ggml_tensor * simple_tensor = ggml_backend_meta_buffer_simple_tensor(tensor, j);
const size_t chunk_size_j = simple_tensor->nb[split_state.axis + 1];
if (chunk_size_j == 0) {
continue;
}
ggml_backend_tensor_get_2d_async(simple_backend, simple_tensor, (char *) data + offset_j, offset, chunk_size_j,
i_stop - i_start, chunk_size_j, chunk_size_full);
offset_j += chunk_size_j;