ggml-webgpu: remove legacy constants (#23672)

This commit is contained in:
Reese Levine 2026-05-27 14:22:33 -07:00 committed by GitHub
parent aa50b2c2ae
commit f12cc6d0fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -94,14 +94,6 @@ static inline uint32_t ggml_webgpu_u32_from_f32(float value) {
#define WEBGPU_SET_ROWS_ERROR_BUF_SIZE_BYTES 4
#define WEBGPU_STORAGE_BUF_BINDING_MULT 4 // a storage buffer binding size must be a multiple of 4
// For operations which process a row in parallel, this seems like a reasonable
// default
#define WEBGPU_ROW_SPLIT_WG_SIZE 64
// Track https://github.com/gpuweb/gpuweb/issues/5315 for fixes to
// implementations so this can be removed, necessary only for get_rows right now
#define WEBGPU_MAX_WG_SIZE 288
/* End Constants */
// This is a "fake" base pointer, since WebGPU buffers do not have pointers to
@ -631,7 +623,7 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_global_context & ctx,
size_t size) {
std::vector<uint32_t> params = { (uint32_t) offset, (uint32_t) size, value };
std::vector<wgpu::BindGroupEntry> entries = { ggml_webgpu_make_bind_group_entry(0, buf, 0, buf.GetSize()) };
size_t bytes_per_wg = WEBGPU_MAX_WG_SIZE * ctx->capabilities.memset_bytes_per_thread;
size_t bytes_per_wg = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup * ctx->capabilities.memset_bytes_per_thread;
uint32_t wg_x = CEIL_DIV(size + 3, bytes_per_wg);
ctx->queue.WriteBuffer(ctx->memset_params_buf, 0, params.data(), params.size() * sizeof(uint32_t));
@ -1366,7 +1358,7 @@ static webgpu_encoded_op ggml_webgpu_get_rows(webgpu_context & ctx,
shader_lib_ctx.src0 = src;
shader_lib_ctx.src1 = nullptr;
shader_lib_ctx.dst = dst;
shader_lib_ctx.max_wg_size = WEBGPU_MAX_WG_SIZE;
shader_lib_ctx.max_wg_size = ctx->global_ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup;
webgpu_pipeline pipeline = ctx->shader_lib->get_get_rows_pipeline(shader_lib_ctx);
auto * decisions = static_cast<ggml_webgpu_generic_shader_decisions *>(pipeline.context.get());
@ -3716,13 +3708,13 @@ static ggml_guid_t ggml_backend_webgpu_guid(void) {
static void ggml_webgpu_init_memset_pipeline(webgpu_global_context & ctx) {
// we use the maximum workgroup size for the memset pipeline
size_t max_threads = WEBGPU_MAX_WG_SIZE * ctx->capabilities.limits.maxComputeWorkgroupsPerDimension;
size_t max_threads = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup * ctx->capabilities.limits.maxComputeWorkgroupsPerDimension;
// Size the bytes_per_thread so that the largest buffer size can be handled
ctx->capabilities.memset_bytes_per_thread =
CEIL_DIV(ctx->capabilities.limits.maxStorageBufferBindingSize, max_threads);
std::vector<wgpu::ConstantEntry> constants(2);
constants[0].key = "wg_size";
constants[0].value = WEBGPU_MAX_WG_SIZE;
constants[0].value = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup;
constants[1].key = "bytes_per_thread";
constants[1].value = ctx->capabilities.memset_bytes_per_thread;
ctx->memset_pipeline = ggml_webgpu_create_pipeline(ctx->device, wgsl_memset, "memset", constants);