From f12cc6d0fa96d6a3c33952f06b7439ac43a3c3fe Mon Sep 17 00:00:00 2001 From: Reese Levine Date: Wed, 27 May 2026 14:22:33 -0700 Subject: [PATCH] ggml-webgpu: remove legacy constants (#23672) --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index f6d17a073..1846886db 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -94,14 +94,6 @@ static inline uint32_t ggml_webgpu_u32_from_f32(float value) { #define WEBGPU_SET_ROWS_ERROR_BUF_SIZE_BYTES 4 #define WEBGPU_STORAGE_BUF_BINDING_MULT 4 // a storage buffer binding size must be a multiple of 4 -// For operations which process a row in parallel, this seems like a reasonable -// default -#define WEBGPU_ROW_SPLIT_WG_SIZE 64 - -// Track https://github.com/gpuweb/gpuweb/issues/5315 for fixes to -// implementations so this can be removed, necessary only for get_rows right now -#define WEBGPU_MAX_WG_SIZE 288 - /* End Constants */ // This is a "fake" base pointer, since WebGPU buffers do not have pointers to @@ -631,7 +623,7 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_global_context & ctx, size_t size) { std::vector params = { (uint32_t) offset, (uint32_t) size, value }; std::vector entries = { ggml_webgpu_make_bind_group_entry(0, buf, 0, buf.GetSize()) }; - size_t bytes_per_wg = WEBGPU_MAX_WG_SIZE * ctx->capabilities.memset_bytes_per_thread; + size_t bytes_per_wg = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup * ctx->capabilities.memset_bytes_per_thread; uint32_t wg_x = CEIL_DIV(size + 3, bytes_per_wg); ctx->queue.WriteBuffer(ctx->memset_params_buf, 0, params.data(), params.size() * sizeof(uint32_t)); @@ -1366,7 +1358,7 @@ static webgpu_encoded_op ggml_webgpu_get_rows(webgpu_context & ctx, shader_lib_ctx.src0 = src; shader_lib_ctx.src1 = nullptr; shader_lib_ctx.dst = dst; - shader_lib_ctx.max_wg_size = WEBGPU_MAX_WG_SIZE; + shader_lib_ctx.max_wg_size = ctx->global_ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup; webgpu_pipeline pipeline = ctx->shader_lib->get_get_rows_pipeline(shader_lib_ctx); auto * decisions = static_cast(pipeline.context.get()); @@ -3716,13 +3708,13 @@ static ggml_guid_t ggml_backend_webgpu_guid(void) { static void ggml_webgpu_init_memset_pipeline(webgpu_global_context & ctx) { // we use the maximum workgroup size for the memset pipeline - size_t max_threads = WEBGPU_MAX_WG_SIZE * ctx->capabilities.limits.maxComputeWorkgroupsPerDimension; + size_t max_threads = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup * ctx->capabilities.limits.maxComputeWorkgroupsPerDimension; // Size the bytes_per_thread so that the largest buffer size can be handled ctx->capabilities.memset_bytes_per_thread = CEIL_DIV(ctx->capabilities.limits.maxStorageBufferBindingSize, max_threads); std::vector constants(2); constants[0].key = "wg_size"; - constants[0].value = WEBGPU_MAX_WG_SIZE; + constants[0].value = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup; constants[1].key = "bytes_per_thread"; constants[1].value = ctx->capabilities.memset_bytes_per_thread; ctx->memset_pipeline = ggml_webgpu_create_pipeline(ctx->device, wgsl_memset, "memset", constants);