not working commit, need to fix vulkan shaders gen

This commit is contained in:
Concedo 2025-10-05 11:32:50 +08:00
commit c83dde8a34
136 changed files with 394 additions and 323 deletions

View file

@ -1638,18 +1638,14 @@ static void add_rpc_devices(const std::string & servers) {
if (!rpc_reg) {
throw std::invalid_argument("failed to find RPC backend");
}
typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
if (!ggml_backend_rpc_add_device_fn) {
throw std::invalid_argument("failed to find RPC device add function");
typedef ggml_backend_reg_t (*ggml_backend_rpc_add_server_t)(const char * endpoint);
ggml_backend_rpc_add_server_t ggml_backend_rpc_add_server_fn = (ggml_backend_rpc_add_server_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server");
if (!ggml_backend_rpc_add_server_fn) {
throw std::invalid_argument("failed to find RPC add server function");
}
for (const auto & server : rpc_servers) {
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
if (dev) {
ggml_backend_device_register(dev);
} else {
throw std::invalid_argument("failed to register RPC device");
}
auto reg = ggml_backend_rpc_add_server_fn(server.c_str());
ggml_backend_register(reg);
}
}

View file

@ -215,6 +215,8 @@ extern "C" {
// Backend registry
//
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
// Backend (reg) enumeration

View file

@ -7,26 +7,25 @@
extern "C" {
#endif
#define RPC_PROTO_MAJOR_VERSION 2
#define RPC_PROTO_MAJOR_VERSION 3
#define RPC_PROTO_MINOR_VERSION 0
#define RPC_PROTO_PATCH_VERSION 0
#define GGML_RPC_MAX_SERVERS 16
// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint);
GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device);
GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend);
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint);
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint, uint32_t device);
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total);
GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total);
GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
const char * cache_dir,
size_t free_mem, size_t total_mem);
GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir,
size_t n_threads, size_t n_devices,
ggml_backend_dev_t * devices, size_t * free_mem, size_t * total_mem);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void);
GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint);
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint);
#ifdef __cplusplus
}

View file

@ -209,9 +209,6 @@ extern "C" {
void * context;
};
// Internal backend registry API
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
// Add backend dynamic loading support to the backend
// Initialize the backend

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View file

@ -6,8 +6,8 @@
#extension GL_KHR_shader_subgroup_basic : enable
#endif
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
const uint num_threads = 256;

View file

@ -2,7 +2,7 @@
#extension GL_EXT_control_flow_attributes : require
#include "types.comp"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_control_flow_attributes : enable
#include "types.comp"
#include "types.glsl"
layout(constant_id = 0) const int BLOCK_SIZE = 1024;
layout(constant_id = 1) const int BLOCK_SIZE_LOG2 = 10;

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
#extension GL_EXT_control_flow_attributes : require

View file

@ -1,6 +1,6 @@
#version 450
#include "types.comp"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View file

@ -11,7 +11,7 @@
# extension GL_KHR_shader_subgroup_shuffle : enable
#endif
#include "types.comp"
#include "types.glsl"
// shape notation: [dim(N), ..., dim(0)] -- stride(dim(j)) >= stride(dim(i)) if i > j
layout(binding = 0) readonly buffer A {

View file

@ -1,6 +1,6 @@
#version 450
#include "types.comp"
#include "types.glsl"
layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; // src0 - kernel: [K, Cout, Cin]
layout (binding = 1) readonly buffer B {B_TYPE data_b[];}; // src1 - input: [L, Cin]

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,8 +1,8 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "dequant_funcs.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
#include "dequant_funcs.glsl"
#if defined(DATA_A_IQ4_NL) || defined(DATA_A_MXFP4)
// 16 invocations needed for init_iq_shmem

View file

@ -1,7 +1,7 @@
#version 450
#include "rte.comp"
#include "types.comp"
#include "rte.glsl"
#include "types.glsl"
#if defined(SET_ROWS) && QUANT_K == 1
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
@ -14,7 +14,7 @@ const uint BLOCK_SIZE = 32;
layout (binding = 0) readonly buffer S {float data_s[];};
#if defined(SET_ROWS)
#include "generic_binary_head.comp"
#include "generic_binary_head.glsl"
layout (binding = 1) readonly buffer C {B_TYPE data_i[];};
layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];};
@ -25,7 +25,7 @@ layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];};
#endif
#else
#include "generic_unary_head.comp"
#include "generic_unary_head.glsl"
layout (binding = 1) writeonly buffer Q {A_TYPE data_q[];};
#endif

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_unary_head.comp"
#include "types.glsl"
#include "generic_unary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View file

@ -2,8 +2,8 @@
#extension GL_EXT_control_flow_attributes : enable
#include "types.comp"
#include "generic_head.comp"
#include "types.glsl"
#include "generic_head.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#endif
#include "types.comp"
#include "types.glsl"
#if defined(A_TYPE_PACKED16)
layout (binding = 0) readonly buffer A_PACKED16 {A_TYPE_PACKED16 data_a_packed16[];};

View file

@ -1,5 +1,5 @@
#include "types.comp"
#include "types.glsl"
layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ4_0 {
block_q4_0_packed16 block;

View file

@ -10,4 +10,4 @@ layout (push_constant) uniform parameter
uint nel;
} p;
#include "types.comp"
#include "types.glsl"

View file

@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#version 450
#include "dequant_head.comp"
#include "dequant_head.glsl"
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

View file

@ -10,7 +10,7 @@ layout (push_constant) uniform parameter
uint n_past;
} p;
#include "types.comp"
#include "types.glsl"
layout(local_size_x = 1, local_size_y = 512, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
const uint num_threads = 256;

View file

@ -1,8 +1,8 @@
#version 450
#include "rte.comp"
#include "generic_head.comp"
#include "types.comp"
#include "rte.glsl"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -8,8 +8,8 @@
#extension GL_KHR_shader_subgroup_shuffle : enable
#include "types.comp"
#include "flash_attn_base.comp"
#include "types.glsl"
#include "flash_attn_base.glsl"
const uint32_t HSK_per_thread = HSK / D_split;
const uint32_t HSV_per_thread = HSV / D_split;

View file

@ -10,8 +10,8 @@
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_KHR_cooperative_matrix : enable
#include "types.comp"
#include "flash_attn_base.comp"
#include "types.glsl"
#include "flash_attn_base.glsl"
const uint32_t HSK_per_thread = HSK / D_split;
const uint32_t HSV_per_thread = HSV / D_split;

View file

@ -16,9 +16,9 @@
#extension GL_KHR_shader_subgroup_vote : enable
#extension GL_EXT_null_initializer : enable
#include "types.comp"
#include "dequant_funcs_cm2.comp"
#include "flash_attn_base.comp"
#include "types.glsl"
#include "dequant_funcs_cm2.glsl"
#include "flash_attn_base.glsl"
layout (binding = 0) readonly buffer Q {uint8_t data_q[];};
layout (binding = 1) readonly buffer K {uint8_t data_k[];};

View file

@ -1,6 +1,6 @@
#version 450
#include "glu_head.comp"
#include "glu_head.glsl"
const float GELU_COEF_A = 0.044715f;
const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
@ -10,4 +10,4 @@ float op(float a, float b) {
return 0.5f*a*(2.0f - 2.0f / (exp(2 * val) + 1)) * b;
}
#include "glu_main.comp"
#include "glu_main.glsl"

View file

@ -1,6 +1,6 @@
#version 450
#include "glu_head.comp"
#include "glu_head.glsl"
// based on Abramowitz and Stegun formula 7.1.26 or similar Hastings' approximation
// ref: https://www.johndcook.com/blog/python_erf/
@ -24,4 +24,4 @@ float op(float a, float b) {
return 0.5f * a * (1.0f + erf_approx) * b;
}
#include "glu_main.comp"
#include "glu_main.glsl"

View file

@ -1,6 +1,6 @@
#version 450
#include "glu_head.comp"
#include "glu_head.glsl"
const float GELU_QUICK_COEF = -1.702f;
@ -8,4 +8,4 @@ float op(float a, float b) {
return a * (1.0f / (1.0f + exp(GELU_QUICK_COEF * a))) * b;
}
#include "glu_main.comp"
#include "glu_main.glsl"

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -1,8 +1,8 @@
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_control_flow_attributes : require
#include "rte.comp"
#include "utils.comp"
#include "rte.glsl"
#include "utils.glsl"
layout (push_constant) uniform parameter
{

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View file

@ -2,9 +2,9 @@
#extension GL_EXT_control_flow_attributes : enable
#include "types.comp"
#include "generic_binary_head.comp"
#include "dequant_funcs.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
#include "dequant_funcs.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,6 +1,6 @@
#extension GL_EXT_shader_16bit_storage : require
#include "rte.comp"
#include "rte.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable
#define BLOCK_SIZE 512

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -3,9 +3,8 @@
#extension GL_EXT_shader_16bit_storage : require
#extension GL_EXT_control_flow_attributes : require
#include "rte.comp"
#include "types.comp"
#include "rte.glsl"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View file

@ -4,9 +4,8 @@
#extension GL_EXT_control_flow_attributes : require
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "rte.comp"
#include "types.comp"
#include "rte.glsl"
#include "types.glsl"
layout (push_constant) uniform parameter
{

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable
#define BLOCK_SIZE 512

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -1,7 +1,7 @@
#version 450
#include "types.comp"
#include "generic_binary_head.comp"
#include "types.glsl"
#include "generic_binary_head.glsl"
const uint num_threads = 256;

View file

@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -11,7 +11,7 @@
#define EXPERT_COUNT 8
#endif
#include "types.comp"
#include "types.glsl"
#ifndef MMQ
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
@ -32,7 +32,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
layout (binding = 3) readonly buffer IDS {int data_ids[];};
#endif
#include "dequant_funcs.comp"
#include "dequant_funcs.glsl"
layout (push_constant) uniform parameter
{

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -1,7 +1,7 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int32 : require
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;

View file

@ -6,13 +6,13 @@
#define MMQ
#define B_TYPE block_q8_1_x4
#include "mul_mat_vec_base.comp"
#include "mul_mat_vec_base.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
#define K_PER_ITER 8
#include "mul_mmq_funcs.comp"
#include "mul_mmq_funcs.glsl"
uint a_offset, b_offset, d_offset;

View file

@ -28,7 +28,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#endif
#include "types.comp"
#include "types.glsl"
#ifndef LOAD_VEC_A
#define LOAD_VEC_A 1
@ -195,7 +195,7 @@ void load_row_ids(uint expert_idx, bool nei0_is_pow2, uint ic) {
shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS];
#endif
#include "mul_mm_funcs.comp"
#include "mul_mm_funcs.glsl"
void main() {
#ifdef NEEDS_INIT_IQ_SHMEM

View file

@ -18,8 +18,8 @@
#extension GL_EXT_bfloat16 : enable
#endif
#include "types.comp"
#include "utils.comp"
#include "types.glsl"
#include "utils.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
@ -71,7 +71,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
#if QUANT_K > 1
#define DECODEFUNCA , dequantFuncA
#include "dequant_funcs_cm2.comp"
#include "dequant_funcs_cm2.glsl"
#else
#define DECODEFUNCA

View file

@ -20,7 +20,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#endif
#include "types.comp"
#include "types.glsl"
layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
@ -110,7 +110,7 @@ shared u16vec2 row_ids[4096];
shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS];
#endif
#include "mul_mmq_funcs.comp"
#include "mul_mmq_funcs.glsl"
void main() {
#ifdef NEEDS_INIT_IQ_SHMEM

View file

@ -2,7 +2,7 @@
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#include "types.comp"
#include "types.glsl"
// Each iqs value maps to a 32-bit integer

View file

@ -8,9 +8,9 @@
#extension GL_KHR_shader_subgroup_basic : enable
#endif
#include "rte.comp"
#include "types.comp"
#include "utils.comp"
#include "rte.glsl"
#include "types.glsl"
#include "utils.glsl"
layout (push_constant) uniform parameter2
{

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable
#define BLOCK_SIZE 512

View file

@ -1,7 +1,7 @@
#version 450
#include "generic_head.comp"
#include "types.comp"
#include "generic_head.glsl"
#include "types.glsl"
#extension GL_EXT_control_flow_attributes : enable

View file

@ -1,6 +1,6 @@
#version 450
#include "generic_head.comp"
#include "generic_head.glsl"
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;

Some files were not shown because too many files have changed in this diff Show more