mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-30 12:03:38 +00:00
ggml-webgpu: remove legacy constants (#23672)
This commit is contained in:
parent
aa50b2c2ae
commit
f12cc6d0fa
1 changed files with 4 additions and 12 deletions
|
|
@ -94,14 +94,6 @@ static inline uint32_t ggml_webgpu_u32_from_f32(float value) {
|
|||
#define WEBGPU_SET_ROWS_ERROR_BUF_SIZE_BYTES 4
|
||||
#define WEBGPU_STORAGE_BUF_BINDING_MULT 4 // a storage buffer binding size must be a multiple of 4
|
||||
|
||||
// For operations which process a row in parallel, this seems like a reasonable
|
||||
// default
|
||||
#define WEBGPU_ROW_SPLIT_WG_SIZE 64
|
||||
|
||||
// Track https://github.com/gpuweb/gpuweb/issues/5315 for fixes to
|
||||
// implementations so this can be removed, necessary only for get_rows right now
|
||||
#define WEBGPU_MAX_WG_SIZE 288
|
||||
|
||||
/* End Constants */
|
||||
|
||||
// This is a "fake" base pointer, since WebGPU buffers do not have pointers to
|
||||
|
|
@ -631,7 +623,7 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_global_context & ctx,
|
|||
size_t size) {
|
||||
std::vector<uint32_t> params = { (uint32_t) offset, (uint32_t) size, value };
|
||||
std::vector<wgpu::BindGroupEntry> entries = { ggml_webgpu_make_bind_group_entry(0, buf, 0, buf.GetSize()) };
|
||||
size_t bytes_per_wg = WEBGPU_MAX_WG_SIZE * ctx->capabilities.memset_bytes_per_thread;
|
||||
size_t bytes_per_wg = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup * ctx->capabilities.memset_bytes_per_thread;
|
||||
uint32_t wg_x = CEIL_DIV(size + 3, bytes_per_wg);
|
||||
|
||||
ctx->queue.WriteBuffer(ctx->memset_params_buf, 0, params.data(), params.size() * sizeof(uint32_t));
|
||||
|
|
@ -1366,7 +1358,7 @@ static webgpu_encoded_op ggml_webgpu_get_rows(webgpu_context & ctx,
|
|||
shader_lib_ctx.src0 = src;
|
||||
shader_lib_ctx.src1 = nullptr;
|
||||
shader_lib_ctx.dst = dst;
|
||||
shader_lib_ctx.max_wg_size = WEBGPU_MAX_WG_SIZE;
|
||||
shader_lib_ctx.max_wg_size = ctx->global_ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup;
|
||||
|
||||
webgpu_pipeline pipeline = ctx->shader_lib->get_get_rows_pipeline(shader_lib_ctx);
|
||||
auto * decisions = static_cast<ggml_webgpu_generic_shader_decisions *>(pipeline.context.get());
|
||||
|
|
@ -3716,13 +3708,13 @@ static ggml_guid_t ggml_backend_webgpu_guid(void) {
|
|||
|
||||
static void ggml_webgpu_init_memset_pipeline(webgpu_global_context & ctx) {
|
||||
// we use the maximum workgroup size for the memset pipeline
|
||||
size_t max_threads = WEBGPU_MAX_WG_SIZE * ctx->capabilities.limits.maxComputeWorkgroupsPerDimension;
|
||||
size_t max_threads = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup * ctx->capabilities.limits.maxComputeWorkgroupsPerDimension;
|
||||
// Size the bytes_per_thread so that the largest buffer size can be handled
|
||||
ctx->capabilities.memset_bytes_per_thread =
|
||||
CEIL_DIV(ctx->capabilities.limits.maxStorageBufferBindingSize, max_threads);
|
||||
std::vector<wgpu::ConstantEntry> constants(2);
|
||||
constants[0].key = "wg_size";
|
||||
constants[0].value = WEBGPU_MAX_WG_SIZE;
|
||||
constants[0].value = ctx->capabilities.limits.maxComputeInvocationsPerWorkgroup;
|
||||
constants[1].key = "bytes_per_thread";
|
||||
constants[1].value = ctx->capabilities.memset_bytes_per_thread;
|
||||
ctx->memset_pipeline = ggml_webgpu_create_pipeline(ctx->device, wgsl_memset, "memset", constants);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue