From e29acf74fea996014380d59d31aa504ae8964258 Mon Sep 17 00:00:00 2001 From: Acly Date: Sat, 4 Oct 2025 11:42:56 +0200 Subject: [PATCH 1/4] vulkan : incremental shader builds (#16341) * vulkan (DRAFT): split shader generation by GLSL source file, to improve incremental build times * support dep-files so shaders are recompiled if their included files change * rename shader files which are used as "headers" to use .glsl extension * move glslc extension detection shaders to separate folders * the above is to prevent them from getting glob'd with the actual compute shaders that need to be compiled * vulkan : only write embedded shader .hpp/.cpp when they change * avoid recompiling ggml-vulkan.cpp when editing shaders * pass single --source argument instead of --input-dir & --filter to shader gen * check for source file match earlier * fix hang in vulkan-shaders-gen when there are compilation errors * early out did not decrement compile_count * clean up * fix glslc integer dot product test * unconditionally write the embedded shader cpp output * replace output filepath in generated dep-files to match output in CMakeLists --------- Co-authored-by: Jeff Bolz --- ggml/src/ggml-vulkan/CMakeLists.txt | 45 ++- ggml/src/ggml-vulkan/vulkan-shaders/acc.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/add.comp | 4 +- .../ggml-vulkan/vulkan-shaders/add_id.comp | 2 +- .../ggml-vulkan/vulkan-shaders/argmax.comp | 4 +- .../ggml-vulkan/vulkan-shaders/argsort.comp | 2 +- .../src/ggml-vulkan/vulkan-shaders/clamp.comp | 4 +- .../ggml-vulkan/vulkan-shaders/concat.comp | 4 +- .../vulkan-shaders/contig_copy.comp | 4 +- .../ggml-vulkan/vulkan-shaders/conv2d_dw.comp | 2 +- .../ggml-vulkan/vulkan-shaders/conv2d_mm.comp | 2 +- .../vulkan-shaders/conv_transpose_1d.comp | 2 +- ggml/src/ggml-vulkan/vulkan-shaders/copy.comp | 4 +- .../vulkan-shaders/copy_from_quant.comp | 6 +- .../vulkan-shaders/copy_to_quant.comp | 8 +- ggml/src/ggml-vulkan/vulkan-shaders/cos.comp | 4 +- .../vulkan-shaders/count_equal.comp | 4 +- .../vulkan-shaders/dequant_f32.comp | 2 +- ...{dequant_funcs.comp => dequant_funcs.glsl} | 2 +- ..._funcs_cm2.comp => dequant_funcs_cm2.glsl} | 2 +- .../{dequant_head.comp => dequant_head.glsl} | 2 +- .../vulkan-shaders/dequant_iq1_m.comp | 2 +- .../vulkan-shaders/dequant_iq1_s.comp | 2 +- .../vulkan-shaders/dequant_iq2_s.comp | 2 +- .../vulkan-shaders/dequant_iq2_xs.comp | 2 +- .../vulkan-shaders/dequant_iq2_xxs.comp | 2 +- .../vulkan-shaders/dequant_iq3_s.comp | 2 +- .../vulkan-shaders/dequant_iq3_xxs.comp | 2 +- .../vulkan-shaders/dequant_iq4_nl.comp | 2 +- .../vulkan-shaders/dequant_iq4_xs.comp | 2 +- .../vulkan-shaders/dequant_mxfp4.comp | 2 +- .../vulkan-shaders/dequant_q2_k.comp | 2 +- .../vulkan-shaders/dequant_q3_k.comp | 2 +- .../vulkan-shaders/dequant_q4_0.comp | 2 +- .../vulkan-shaders/dequant_q4_1.comp | 2 +- .../vulkan-shaders/dequant_q4_k.comp | 2 +- .../vulkan-shaders/dequant_q5_0.comp | 2 +- .../vulkan-shaders/dequant_q5_1.comp | 2 +- .../vulkan-shaders/dequant_q5_k.comp | 2 +- .../vulkan-shaders/dequant_q6_k.comp | 2 +- .../vulkan-shaders/dequant_q8_0.comp | 2 +- .../vulkan-shaders/diag_mask_inf.comp | 2 +- ggml/src/ggml-vulkan/vulkan-shaders/div.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/exp.comp | 6 +- .../bfloat16.comp} | 0 .../coopmat.comp} | 0 .../coopmat2.comp} | 0 .../integer_dot.comp} | 0 .../vulkan-shaders/flash_attn.comp | 4 +- ...sh_attn_base.comp => flash_attn_base.glsl} | 0 .../vulkan-shaders/flash_attn_cm1.comp | 4 +- .../vulkan-shaders/flash_attn_cm2.comp | 6 +- .../src/ggml-vulkan/vulkan-shaders/geglu.comp | 4 +- .../ggml-vulkan/vulkan-shaders/geglu_erf.comp | 4 +- .../vulkan-shaders/geglu_quick.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp | 4 +- .../ggml-vulkan/vulkan-shaders/gelu_erf.comp | 4 +- .../vulkan-shaders/gelu_quick.comp | 4 +- ...ary_head.comp => generic_binary_head.glsl} | 4 +- .../{generic_head.comp => generic_head.glsl} | 0 ...nary_head.comp => generic_unary_head.glsl} | 0 .../ggml-vulkan/vulkan-shaders/get_rows.comp | 4 +- .../vulkan-shaders/get_rows_quant.comp | 6 +- .../{glu_head.comp => glu_head.glsl} | 2 +- .../{glu_main.comp => glu_main.glsl} | 0 .../vulkan-shaders/group_norm.comp | 4 +- .../vulkan-shaders/hardsigmoid.comp | 4 +- .../ggml-vulkan/vulkan-shaders/hardswish.comp | 4 +- .../ggml-vulkan/vulkan-shaders/im2col.comp | 5 +- .../ggml-vulkan/vulkan-shaders/im2col_3d.comp | 5 +- .../ggml-vulkan/vulkan-shaders/l2_norm.comp | 4 +- .../vulkan-shaders/leaky_relu.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/mul.comp | 4 +- .../vulkan-shaders/mul_mat_vec.comp | 2 +- ...at_vec_base.comp => mul_mat_vec_base.glsl} | 4 +- .../vulkan-shaders/mul_mat_vec_iq1_m.comp | 2 +- .../vulkan-shaders/mul_mat_vec_iq1_s.comp | 2 +- .../vulkan-shaders/mul_mat_vec_iq2_s.comp | 2 +- .../vulkan-shaders/mul_mat_vec_iq2_xs.comp | 2 +- .../vulkan-shaders/mul_mat_vec_iq2_xxs.comp | 2 +- .../vulkan-shaders/mul_mat_vec_iq3_s.comp | 2 +- .../vulkan-shaders/mul_mat_vec_iq3_xxs.comp | 2 +- .../vulkan-shaders/mul_mat_vec_q2_k.comp | 2 +- .../vulkan-shaders/mul_mat_vec_q3_k.comp | 2 +- .../vulkan-shaders/mul_mat_vec_q4_k.comp | 2 +- .../vulkan-shaders/mul_mat_vec_q5_k.comp | 2 +- .../vulkan-shaders/mul_mat_vec_q6_k.comp | 2 +- .../vulkan-shaders/mul_mat_vecq.comp | 4 +- .../ggml-vulkan/vulkan-shaders/mul_mm.comp | 4 +- .../vulkan-shaders/mul_mm_cm2.comp | 6 +- .../{mul_mm_funcs.comp => mul_mm_funcs.glsl} | 0 .../ggml-vulkan/vulkan-shaders/mul_mmq.comp | 4 +- ...{mul_mmq_funcs.comp => mul_mmq_funcs.glsl} | 2 +- .../ggml-vulkan/vulkan-shaders/multi_add.comp | 6 +- ggml/src/ggml-vulkan/vulkan-shaders/norm.comp | 4 +- .../vulkan-shaders/opt_step_adamw.comp | 4 +- .../vulkan-shaders/opt_step_sgd.comp | 2 +- ggml/src/ggml-vulkan/vulkan-shaders/pad.comp | 2 +- .../ggml-vulkan/vulkan-shaders/pool2d.comp | 2 +- .../vulkan-shaders/quantize_q8_1.comp | 2 +- .../src/ggml-vulkan/vulkan-shaders/reglu.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/relu.comp | 4 +- .../ggml-vulkan/vulkan-shaders/repeat.comp | 4 +- .../vulkan-shaders/repeat_back.comp | 4 +- .../ggml-vulkan/vulkan-shaders/rms_norm.comp | 4 +- .../vulkan-shaders/rms_norm_back.comp | 4 +- .../vulkan-shaders/rms_norm_partials.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/roll.comp | 4 +- .../{rope_head.comp => rope_head.glsl} | 4 +- .../vulkan-shaders/rope_multi.comp | 2 +- .../ggml-vulkan/vulkan-shaders/rope_neox.comp | 2 +- .../ggml-vulkan/vulkan-shaders/rope_norm.comp | 2 +- .../vulkan-shaders/rope_vision.comp | 2 +- .../vulkan-shaders/{rte.comp => rte.glsl} | 0 .../src/ggml-vulkan/vulkan-shaders/scale.comp | 4 +- .../ggml-vulkan/vulkan-shaders/sigmoid.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/silu.comp | 4 +- .../ggml-vulkan/vulkan-shaders/silu_back.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/sin.comp | 4 +- .../ggml-vulkan/vulkan-shaders/soft_max.comp | 2 +- .../vulkan-shaders/soft_max_back.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp | 4 +- .../ggml-vulkan/vulkan-shaders/square.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/sub.comp | 4 +- .../ggml-vulkan/vulkan-shaders/sum_rows.comp | 2 +- .../ggml-vulkan/vulkan-shaders/swiglu.comp | 4 +- .../vulkan-shaders/swiglu_oai.comp | 4 +- ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp | 4 +- .../vulkan-shaders/timestep_embedding.comp | 2 +- .../vulkan-shaders/{types.comp => types.glsl} | 0 .../ggml-vulkan/vulkan-shaders/upscale.comp | 2 +- .../vulkan-shaders/{utils.comp => utils.glsl} | 0 .../vulkan-shaders/vulkan-shaders-gen.cpp | 294 +++++++++++------- 133 files changed, 404 insertions(+), 315 deletions(-) rename ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs.comp => dequant_funcs.glsl} (99%) rename ggml/src/ggml-vulkan/vulkan-shaders/{dequant_funcs_cm2.comp => dequant_funcs_cm2.glsl} (99%) rename ggml/src/ggml-vulkan/vulkan-shaders/{dequant_head.comp => dequant_head.glsl} (91%) rename ggml/src/ggml-vulkan/vulkan-shaders/{test_bfloat16_support.comp => feature-tests/bfloat16.comp} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat_support.comp => feature-tests/coopmat.comp} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{test_coopmat2_support.comp => feature-tests/coopmat2.comp} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{test_integer_dot_support.comp => feature-tests/integer_dot.comp} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{flash_attn_base.comp => flash_attn_base.glsl} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{generic_binary_head.comp => generic_binary_head.glsl} (97%) rename ggml/src/ggml-vulkan/vulkan-shaders/{generic_head.comp => generic_head.glsl} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{generic_unary_head.comp => generic_unary_head.glsl} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{glu_head.comp => glu_head.glsl} (95%) rename ggml/src/ggml-vulkan/vulkan-shaders/{glu_main.comp => glu_main.glsl} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{mul_mat_vec_base.comp => mul_mat_vec_base.glsl} (99%) rename ggml/src/ggml-vulkan/vulkan-shaders/{mul_mm_funcs.comp => mul_mm_funcs.glsl} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{mul_mmq_funcs.comp => mul_mmq_funcs.glsl} (99%) rename ggml/src/ggml-vulkan/vulkan-shaders/{rope_head.comp => rope_head.glsl} (97%) rename ggml/src/ggml-vulkan/vulkan-shaders/{rte.comp => rte.glsl} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{types.comp => types.glsl} (100%) rename ggml/src/ggml-vulkan/vulkan-shaders/{utils.comp => utils.glsl} (100%) diff --git a/ggml/src/ggml-vulkan/CMakeLists.txt b/ggml/src/ggml-vulkan/CMakeLists.txt index b97e7bf99..83a83887b 100644 --- a/ggml/src/ggml-vulkan/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.19) cmake_policy(SET CMP0114 NEW) +cmake_policy(SET CMP0116 NEW) find_package(Vulkan COMPONENTS glslc REQUIRED) @@ -54,25 +55,25 @@ if (Vulkan_FOUND) # Test all shader extensions test_shader_extension_support( "GL_KHR_cooperative_matrix" - "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat_support.comp" + "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/coopmat.comp" "GGML_VULKAN_COOPMAT_GLSLC_SUPPORT" ) test_shader_extension_support( "GL_NV_cooperative_matrix2" - "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat2_support.comp" + "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/coopmat2.comp" "GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT" ) test_shader_extension_support( "GL_EXT_integer_dot_product" - "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_integer_dot_support.comp" + "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/integer_dot.comp" "GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT" ) test_shader_extension_support( "GL_EXT_bfloat16" - "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_bfloat16_support.comp" + "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/feature-tests/bfloat16.comp" "GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT" ) @@ -160,7 +161,6 @@ if (Vulkan_FOUND) set (_ggml_vk_genshaders_dir "${CMAKE_BINARY_DIR}/$") set (_ggml_vk_genshaders_cmd "${_ggml_vk_genshaders_dir}/vulkan-shaders-gen${_ggml_vk_host_suffix}") set (_ggml_vk_header "${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.hpp") - set (_ggml_vk_source "${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.cpp") set (_ggml_vk_input_dir "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders") set (_ggml_vk_output_dir "${CMAKE_CURRENT_BINARY_DIR}/vulkan-shaders.spv") @@ -176,24 +176,35 @@ if (Vulkan_FOUND) add_custom_command( OUTPUT ${_ggml_vk_header} - ${_ggml_vk_source} - COMMAND ${_ggml_vk_genshaders_cmd} - --glslc ${Vulkan_GLSLC_EXECUTABLE} - --input-dir ${_ggml_vk_input_dir} --output-dir ${_ggml_vk_output_dir} --target-hpp ${_ggml_vk_header} - --target-cpp ${_ggml_vk_source} - --no-clean - - DEPENDS ${_ggml_vk_shader_files} - ${_ggml_vk_shaders_gen_sources} + DEPENDS ${_ggml_vk_shaders_gen_sources} vulkan-shaders-gen - - COMMENT "Generate vulkan shaders" + COMMENT "Generate vulkan shaders header" ) + target_sources(ggml-vulkan PRIVATE ${_ggml_vk_header}) - target_sources(ggml-vulkan PRIVATE ${_ggml_vk_source} ${_ggml_vk_header}) + foreach (file_full ${_ggml_vk_shader_files}) + get_filename_component(file ${file_full} NAME) + set (_ggml_vk_target_cpp "${CMAKE_CURRENT_BINARY_DIR}/${file}.cpp") + + add_custom_command( + OUTPUT ${_ggml_vk_target_cpp} + DEPFILE ${_ggml_vk_target_cpp}.d + COMMAND ${_ggml_vk_genshaders_cmd} + --glslc ${Vulkan_GLSLC_EXECUTABLE} + --source ${file_full} + --output-dir ${_ggml_vk_output_dir} + --target-hpp ${_ggml_vk_header} + --target-cpp ${_ggml_vk_target_cpp} + DEPENDS ${file_full} + ${_ggml_vk_shaders_gen_sources} + vulkan-shaders-gen + COMMENT "Generate vulkan shaders for ${file}" + ) + target_sources(ggml-vulkan PRIVATE ${_ggml_vk_target_cpp}) + endforeach() else() message(WARNING "Vulkan not found") diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp b/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp index d896f1ef0..5084a70ed 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_binary_head.comp" +#include "types.glsl" +#include "generic_binary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/add.comp b/ggml/src/ggml-vulkan/vulkan-shaders/add.comp index 00cf2dd62..3bcfe6908 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/add.comp @@ -6,8 +6,8 @@ #extension GL_KHR_shader_subgroup_basic : enable #endif -#include "types.comp" -#include "generic_binary_head.comp" +#include "types.glsl" +#include "generic_binary_head.glsl" const uint num_threads = 256; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp b/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp index 3ae8f0116..495249d5f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/add_id.comp @@ -2,7 +2,7 @@ #extension GL_EXT_control_flow_attributes : require -#include "types.comp" +#include "types.glsl" layout (push_constant) uniform parameter { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp b/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp index a1d4c240d..7c1287767 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp b/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp index dc53a401e..c81b84452 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_control_flow_attributes : enable -#include "types.comp" +#include "types.glsl" layout(constant_id = 0) const int BLOCK_SIZE = 1024; layout(constant_id = 1) const int BLOCK_SIZE_LOG2 = 10; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp b/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp index 1e5cb8dae..653431895 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp b/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp index 9ee2f1fae..e40469838 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_binary_head.comp" +#include "types.glsl" +#include "generic_binary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp b/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp index 6567a8c54..ca1a3ac25 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" #extension GL_EXT_control_flow_attributes : require diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp b/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp index 938c74da5..70a301488 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp @@ -1,6 +1,6 @@ #version 450 -#include "types.comp" +#include "types.glsl" layout (push_constant) uniform parameter { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp index 44a64ddc8..0367e80bb 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp @@ -11,7 +11,7 @@ # extension GL_KHR_shader_subgroup_shuffle : enable #endif -#include "types.comp" +#include "types.glsl" // shape notation: [dim(N), ..., dim(0)] -- stride(dim(j)) >= stride(dim(i)) if i > j layout(binding = 0) readonly buffer A { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp b/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp index b17b4e83e..5217e18bd 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp @@ -1,6 +1,6 @@ #version 450 -#include "types.comp" +#include "types.glsl" layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; // src0 - kernel: [K, Cout, Cin] layout (binding = 1) readonly buffer B {B_TYPE data_b[];}; // src1 - input: [L, Cin] diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp b/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp index f476a2e3d..9f8bfd3c1 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp b/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp index 978d43003..06df50952 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp @@ -1,8 +1,8 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" -#include "dequant_funcs.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" +#include "dequant_funcs.glsl" #if defined(DATA_A_IQ4_NL) || defined(DATA_A_MXFP4) // 16 invocations needed for init_iq_shmem diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp b/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp index bc2e1f2df..b8c40eec1 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp @@ -1,7 +1,7 @@ #version 450 -#include "rte.comp" -#include "types.comp" +#include "rte.glsl" +#include "types.glsl" #if defined(SET_ROWS) && QUANT_K == 1 layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; @@ -14,7 +14,7 @@ const uint BLOCK_SIZE = 32; layout (binding = 0) readonly buffer S {float data_s[];}; #if defined(SET_ROWS) -#include "generic_binary_head.comp" +#include "generic_binary_head.glsl" layout (binding = 1) readonly buffer C {B_TYPE data_i[];}; layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];}; @@ -25,7 +25,7 @@ layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];}; #endif #else -#include "generic_unary_head.comp" +#include "generic_unary_head.glsl" layout (binding = 1) writeonly buffer Q {A_TYPE data_q[];}; #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp b/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp index 0b8d02f58..db6865db9 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp b/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp index d9345497c..e75df6675 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp @@ -2,8 +2,8 @@ #extension GL_EXT_control_flow_attributes : enable -#include "types.comp" -#include "generic_head.comp" +#include "types.glsl" +#include "generic_head.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp index a4d3fca55..765afffa8 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl similarity index 99% rename from ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl index 73fef4fa6..0d98f5a9d 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.glsl @@ -2,7 +2,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #endif -#include "types.comp" +#include "types.glsl" #if defined(A_TYPE_PACKED16) layout (binding = 0) readonly buffer A_PACKED16 {A_TYPE_PACKED16 data_a_packed16[];}; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl similarity index 99% rename from ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl index 706540fd8..6a5bb4574 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.glsl @@ -1,5 +1,5 @@ -#include "types.comp" +#include "types.glsl" layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufQ4_0 { block_q4_0_packed16 block; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl similarity index 91% rename from ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl index 8d806435b..addceafad 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.glsl @@ -10,4 +10,4 @@ layout (push_constant) uniform parameter uint nel; } p; -#include "types.comp" +#include "types.glsl" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp index b604c1881..637c95fa3 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp @@ -2,7 +2,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp index fd1e4e30d..d1cbc5e9d 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp index 127c7b642..78490162c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp index a08331c40..9b8ce0a7f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp index 0ae9acd02..aacf07d0f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp index e4f42be94..f2c20b1d2 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp index 19c7fdeef..671c1f4a0 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp index 46d9ad15e..8f7833eab 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp index f930852a4..a31369977 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp index ee496e9d5..ffba5a77d 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_mxfp4.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp index d4e4e6bae..58dc2e5df 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp index 3661f771c..0c90be8b4 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp index 408185327..b92b29213 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp index 2f27eee68..6b63cbe58 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp index 1370db365..8b7be557e 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp index b20b80529..f1b0bac87 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp index dc59fe3b7..c495b31f1 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp index 3f3b839e1..6bc04670f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp index 9cf34256e..c8d6fcb49 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp index bd1344a88..10844ddf7 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp @@ -1,6 +1,6 @@ #version 450 -#include "dequant_head.comp" +#include "dequant_head.glsl" layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp b/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp index 26d8bc22a..9cef8a8ec 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp @@ -10,7 +10,7 @@ layout (push_constant) uniform parameter uint n_past; } p; -#include "types.comp" +#include "types.glsl" layout(local_size_x = 1, local_size_y = 512, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/div.comp b/ggml/src/ggml-vulkan/vulkan-shaders/div.comp index 9fb69c6c1..572472f8a 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/div.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_binary_head.comp" +#include "types.glsl" +#include "generic_binary_head.glsl" const uint num_threads = 256; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp b/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp index a3941372a..b69d4ddb0 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/exp.comp @@ -1,8 +1,8 @@ #version 450 -#include "rte.comp" -#include "generic_head.comp" -#include "types.comp" +#include "rte.glsl" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp b/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/bfloat16.comp diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp b/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat.comp diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp b/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/coopmat2.comp diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp b/ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/feature-tests/integer_dot.comp diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp index e42475026..62acbf107 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp @@ -8,8 +8,8 @@ #extension GL_KHR_shader_subgroup_shuffle : enable -#include "types.comp" -#include "flash_attn_base.comp" +#include "types.glsl" +#include "flash_attn_base.glsl" const uint32_t HSK_per_thread = HSK / D_split; const uint32_t HSV_per_thread = HSV / D_split; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.glsl diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp index 0507df2d8..2066a05b3 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp @@ -10,8 +10,8 @@ #extension GL_KHR_memory_scope_semantics : enable #extension GL_KHR_cooperative_matrix : enable -#include "types.comp" -#include "flash_attn_base.comp" +#include "types.glsl" +#include "flash_attn_base.glsl" const uint32_t HSK_per_thread = HSK / D_split; const uint32_t HSV_per_thread = HSV / D_split; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp index a65553a48..910da1ab0 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp @@ -16,9 +16,9 @@ #extension GL_KHR_shader_subgroup_vote : enable #extension GL_EXT_null_initializer : enable -#include "types.comp" -#include "dequant_funcs_cm2.comp" -#include "flash_attn_base.comp" +#include "types.glsl" +#include "dequant_funcs_cm2.glsl" +#include "flash_attn_base.glsl" layout (binding = 0) readonly buffer Q {uint8_t data_q[];}; layout (binding = 1) readonly buffer K {uint8_t data_k[];}; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp index f4268ed24..e017b5036 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp @@ -1,6 +1,6 @@ #version 450 -#include "glu_head.comp" +#include "glu_head.glsl" const float GELU_COEF_A = 0.044715f; const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f; @@ -10,4 +10,4 @@ float op(float a, float b) { return 0.5f*a*(2.0f - 2.0f / (exp(2 * val) + 1)) * b; } -#include "glu_main.comp" +#include "glu_main.glsl" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp b/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp index cbd4cb36b..759a1848f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp @@ -1,6 +1,6 @@ #version 450 -#include "glu_head.comp" +#include "glu_head.glsl" // based on Abramowitz and Stegun formula 7.1.26 or similar Hastings' approximation // ref: https://www.johndcook.com/blog/python_erf/ @@ -24,4 +24,4 @@ float op(float a, float b) { return 0.5f * a * (1.0f + erf_approx) * b; } -#include "glu_main.comp" +#include "glu_main.glsl" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp b/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp index 3a2a6897b..c4032ab21 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp @@ -1,6 +1,6 @@ #version 450 -#include "glu_head.comp" +#include "glu_head.glsl" const float GELU_QUICK_COEF = -1.702f; @@ -8,4 +8,4 @@ float op(float a, float b) { return a * (1.0f / (1.0f + exp(GELU_QUICK_COEF * a))) * b; } -#include "glu_main.comp" +#include "glu_main.glsl" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp index 4cc7a68ca..a95c2525c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp b/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp index 5fd5a5e70..58375aba0 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp b/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp index e6e6fcfd2..bfdfe2182 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl similarity index 97% rename from ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl index 750e78575..99595fc68 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.glsl @@ -1,8 +1,8 @@ #extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_control_flow_attributes : require -#include "rte.comp" -#include "utils.comp" +#include "rte.glsl" +#include "utils.glsl" layout (push_constant) uniform parameter { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/generic_head.glsl diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.glsl diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp b/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp index 7ef75cd7a..76d83041c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_binary_head.comp" +#include "types.glsl" +#include "generic_binary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp b/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp index 339f905fc..9dba437ed 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp @@ -2,9 +2,9 @@ #extension GL_EXT_control_flow_attributes : enable -#include "types.comp" -#include "generic_binary_head.comp" -#include "dequant_funcs.comp" +#include "types.glsl" +#include "generic_binary_head.glsl" +#include "dequant_funcs.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl similarity index 95% rename from ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl index 51d70869d..216898934 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.glsl @@ -1,6 +1,6 @@ #extension GL_EXT_shader_16bit_storage : require -#include "rte.comp" +#include "rte.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp b/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/glu_main.glsl diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp index b6a0d5645..bdf97dbb5 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable #define BLOCK_SIZE 512 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp b/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp index 1da252cc6..b4dbdf314 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/hardsigmoid.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp b/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp index 3afc58827..1ec315915 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/hardswish.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp b/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp index f0f19a019..1827d647a 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp @@ -3,9 +3,8 @@ #extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_control_flow_attributes : require -#include "rte.comp" - -#include "types.comp" +#include "rte.glsl" +#include "types.glsl" layout (push_constant) uniform parameter { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp b/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp index 9faa636ac..4bf8b4ca0 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/im2col_3d.comp @@ -4,9 +4,8 @@ #extension GL_EXT_control_flow_attributes : require #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "rte.comp" - -#include "types.comp" +#include "rte.glsl" +#include "types.glsl" layout (push_constant) uniform parameter { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp index deba8c398..83ef2f879 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable #define BLOCK_SIZE 512 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp index d90a99aea..b281e855c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp index 43de19df8..02ef1eace 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_binary_head.comp" +#include "types.glsl" +#include "generic_binary_head.glsl" const uint num_threads = 256; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp index bb429dd59..9a03925cf 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp @@ -2,7 +2,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl similarity index 99% rename from ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl index f761391ea..450dee040 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.glsl @@ -11,7 +11,7 @@ #define EXPERT_COUNT 8 #endif -#include "types.comp" +#include "types.glsl" #ifndef MMQ layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; @@ -32,7 +32,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];}; layout (binding = 3) readonly buffer IDS {int data_ids[];}; #endif -#include "dequant_funcs.comp" +#include "dequant_funcs.glsl" layout (push_constant) uniform parameter { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp index e4acbd4f9..4cb292380 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp index 309da0991..0b74b3321 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp index 8d01536fa..e424af12c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp index c49604324..0cd906dbb 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp index 94d4b92e1..71bd72d17 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp index f021e4047..a4b9ab1f9 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp index 3fe9dc3a4..40849c691 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp index 423ceb8a3..03ed25d3b 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp index e91724a28..528f224d8 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp index f9cde0648..21d07d2e5 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp @@ -2,7 +2,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp index 6c84ef3cd..9e46c89a1 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp @@ -2,7 +2,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp index d53d9ee0a..d7a7f6426 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp @@ -2,7 +2,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int32 : require -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp index 8fb314fa0..64293f6ec 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vecq.comp @@ -6,13 +6,13 @@ #define MMQ #define B_TYPE block_q8_1_x4 -#include "mul_mat_vec_base.comp" +#include "mul_mat_vec_base.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; #define K_PER_ITER 8 -#include "mul_mmq_funcs.comp" +#include "mul_mmq_funcs.glsl" uint a_offset, b_offset, d_offset; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp index 3cb24412d..85400ac5f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp @@ -28,7 +28,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require #endif -#include "types.comp" +#include "types.glsl" #ifndef LOAD_VEC_A #define LOAD_VEC_A 1 @@ -195,7 +195,7 @@ void load_row_ids(uint expert_idx, bool nei0_is_pow2, uint ic) { shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS]; #endif -#include "mul_mm_funcs.comp" +#include "mul_mm_funcs.glsl" void main() { #ifdef NEEDS_INIT_IQ_SHMEM diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp index 0e3065e01..2e04baa44 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp @@ -18,8 +18,8 @@ #extension GL_EXT_bfloat16 : enable #endif -#include "types.comp" -#include "utils.comp" +#include "types.glsl" +#include "utils.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; @@ -71,7 +71,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];}; #if QUANT_K > 1 #define DECODEFUNCA , dequantFuncA -#include "dequant_funcs_cm2.comp" +#include "dequant_funcs_cm2.glsl" #else #define DECODEFUNCA diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_funcs.glsl diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp index f36add62a..b5d761c0b 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp @@ -20,7 +20,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require #endif -#include "types.comp" +#include "types.glsl" layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; @@ -110,7 +110,7 @@ shared u16vec2 row_ids[4096]; shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS]; #endif -#include "mul_mmq_funcs.comp" +#include "mul_mmq_funcs.glsl" void main() { #ifdef NEEDS_INIT_IQ_SHMEM diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl similarity index 99% rename from ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl index cdfb230f4..fe71eb131 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.glsl @@ -2,7 +2,7 @@ #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require -#include "types.comp" +#include "types.glsl" // Each iqs value maps to a 32-bit integer diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp b/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp index 854a2ad81..1e8f694a7 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/multi_add.comp @@ -8,9 +8,9 @@ #extension GL_KHR_shader_subgroup_basic : enable #endif -#include "rte.comp" -#include "types.comp" -#include "utils.comp" +#include "rte.glsl" +#include "types.glsl" +#include "utils.glsl" layout (push_constant) uniform parameter2 { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp index 6627a50bd..cc3ea0b76 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable #define BLOCK_SIZE 512 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp b/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp index e0214fe76..1f05f922c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp b/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp index 6426dedee..1251f9cc6 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_sgd.comp @@ -1,6 +1,6 @@ #version 450 -#include "generic_head.comp" +#include "generic_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp b/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp index 0d81220c7..f3c817687 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp @@ -1,6 +1,6 @@ #version 450 -#include "types.comp" +#include "types.glsl" layout (push_constant) uniform parameter { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp b/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp index b6124411a..d9d7166e3 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp @@ -1,6 +1,6 @@ #version 450 -#include "types.comp" +#include "types.glsl" #extension GL_EXT_shader_16bit_storage : require diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp b/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp index 145c9fbdc..0f3c6ca87 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp @@ -17,7 +17,7 @@ layout (push_constant) uniform parameter uint ne; } p; -#include "types.comp" +#include "types.glsl" layout(constant_id = 0) const uint GROUP_SIZE = 32; layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp index 0073d8f76..86be2669a 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp @@ -1,9 +1,9 @@ #version 450 -#include "glu_head.comp" +#include "glu_head.glsl" float op(float a, float b) { return max(a, 0.0f) * b; } -#include "glu_main.comp" +#include "glu_main.glsl" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp index 4f806270c..5725cef23 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp b/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp index 1568b141d..8f4b9a868 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp b/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp index d86279934..87df78294 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp index 41197e930..d5b211ffa 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_binary_head.comp" -#include "types.comp" +#include "generic_binary_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable #define BLOCK_SIZE 512 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp index 76009f3df..87707fc14 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable #define BLOCK_SIZE 512 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp index ba4677c29..4618b2c7e 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_partials.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_binary_head.comp" -#include "types.comp" +#include "generic_binary_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable #extension GL_KHR_shader_subgroup_arithmetic : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp b/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp index b9abe8ded..68fbd0c7b 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl similarity index 97% rename from ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl index 00e203e73..50fc1f1e2 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.glsl @@ -1,8 +1,8 @@ -#include "types.comp" +#include "types.glsl" #extension GL_EXT_shader_16bit_storage : require -#include "rte.comp" +#include "rte.glsl" layout(local_size_x = 1, local_size_y = 256, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp index 5808710cc..111286b49 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp @@ -1,6 +1,6 @@ #version 450 -#include "rope_head.comp" +#include "rope_head.glsl" void main() { const uint i0 = 2*gl_GlobalInvocationID.y; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp index 366a7b1c4..06e095bef 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp @@ -1,6 +1,6 @@ #version 450 -#include "rope_head.comp" +#include "rope_head.glsl" void main() { const uint i0 = 2*gl_GlobalInvocationID.y; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp index 9643bca96..6ba957540 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp @@ -1,6 +1,6 @@ #version 450 -#include "rope_head.comp" +#include "rope_head.glsl" void main() { const uint i0 = 2*gl_GlobalInvocationID.y; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp index cedacc4d1..d37d1c104 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp @@ -1,6 +1,6 @@ #version 450 -#include "rope_head.comp" +#include "rope_head.glsl" void main() { const uint i0 = 2*gl_GlobalInvocationID.y; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/rte.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp b/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp index f10b0a02b..35ec726a0 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" const uint num_threads = 128; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp b/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp index 5c9e5c350..32298d43c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp index 4d36f88e0..7d1cc6f45 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp b/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp index f9afa9b13..e5d949ff1 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp b/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp index d7c15a169..61f17b2f0 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp b/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp index 5f20a1ee7..dca0d896b 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp @@ -23,7 +23,7 @@ layout (push_constant) uniform parameter uint has_sinks; } p; -#include "types.comp" +#include "types.glsl" layout(constant_id = 0) const uint BLOCK_SIZE = 32; layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp b/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp index 144ea58e6..d873332ee 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp @@ -2,8 +2,8 @@ #extension GL_EXT_control_flow_attributes : enable -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" layout(constant_id = 0) const uint BLOCK_SIZE = 32; layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp b/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp index 4bc697b9b..70daad6c5 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/sqrt.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/square.comp b/ggml/src/ggml-vulkan/vulkan-shaders/square.comp index ef43598ba..4eb56afcb 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/square.comp @@ -1,7 +1,7 @@ #version 450 -#include "types.comp" -#include "generic_unary_head.comp" +#include "types.glsl" +#include "generic_unary_head.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp b/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp index 72353cc32..bc924b520 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp @@ -2,8 +2,8 @@ #extension GL_EXT_shader_16bit_storage : require -#include "types.comp" -#include "generic_binary_head.comp" +#include "types.glsl" +#include "generic_binary_head.glsl" const uint num_threads = 256; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp b/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp index 759204afa..bc22aa7bd 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp @@ -1,6 +1,6 @@ #version 450 -#include "types.comp" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp index a28e7c6cc..4fee433a1 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp @@ -1,9 +1,9 @@ #version 450 -#include "glu_head.comp" +#include "glu_head.glsl" float op(float a, float b) { return a / (1.0f + exp(-a)) * b; } -#include "glu_main.comp" +#include "glu_main.glsl" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp b/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp index 970750eec..bda9dea21 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/swiglu_oai.comp @@ -1,6 +1,6 @@ #version 450 -#include "glu_head.comp" +#include "glu_head.glsl" float op(float a, float b) { float xi = min(a, p.limit); @@ -11,4 +11,4 @@ float op(float a, float b) { return out_glu; } -#include "glu_main.comp" +#include "glu_main.glsl" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp b/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp index 8a6f868f5..7b5eb413b 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp @@ -1,7 +1,7 @@ #version 450 -#include "generic_head.comp" -#include "types.comp" +#include "generic_head.glsl" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp b/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp index ce8e09442..160556545 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp @@ -9,7 +9,7 @@ layout (push_constant) uniform parameter uint max_period; } p; -#include "types.comp" +#include "types.glsl" #extension GL_EXT_control_flow_attributes : enable #define BLOCK_SIZE 256 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp b/ggml/src/ggml-vulkan/vulkan-shaders/types.glsl similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/types.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/types.glsl diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp b/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp index 74771def0..154a2172d 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp @@ -9,7 +9,7 @@ layout (push_constant) uniform parameter float sf0; float sf1; float sf2; float sf3; } p; -#include "types.comp" +#include "types.glsl" layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/utils.comp b/ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl similarity index 100% rename from ggml/src/ggml-vulkan/vulkan-shaders/utils.comp rename to ggml/src/ggml-vulkan/vulkan-shaders/utils.glsl diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index 84bb9df9a..e2726f1fa 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -34,13 +34,13 @@ std::mutex lock; std::vector> shader_fnames; +std::locale c_locale("C"); std::string GLSLC = "glslc"; -std::string input_dir = "vulkan-shaders"; +std::string input_filepath = ""; std::string output_dir = "/tmp"; -std::string target_hpp = "ggml-vulkan-shaders.hpp"; -std::string target_cpp = "ggml-vulkan-shaders.cpp"; -bool no_clean = false; +std::string target_hpp = ""; +std::string target_cpp = ""; const std::vector type_names = { "f32", @@ -75,6 +75,7 @@ enum MatMulIdType { }; namespace { + void execute_command(const std::string& command, std::string& stdout_str, std::string& stderr_str) { #ifdef _WIN32 HANDLE stdout_read, stdout_write; @@ -232,16 +233,87 @@ std::string basename(const std::string &path) { return path.substr(path.find_last_of("/\\") + 1); } +std::stringstream make_generic_stringstream() { + std::stringstream ss; + ss.imbue(c_locale); + return ss; +} + +std::string read_binary_file(const std::string& path, bool may_not_exist = false) { + FILE* f = fopen(path.c_str(), "rb"); + if (!f) { + if (!may_not_exist) { + std::cerr << "Error opening file: " << path << " (" << strerror(errno) << ")\n"; + } + return {}; + } + + fseek(f, 0, SEEK_END); + size_t size = ftell(f); + fseek(f, 0, SEEK_SET); + + std::string data(size, '\0'); + size_t read_size = fread(data.data(), 1, size, f); + fclose(f); + if (read_size != size) { + std::cerr << "Error reading file: " << path << " (" << strerror(errno) << ")\n"; + return {}; + } + + return data; +} + +void write_binary_file(const std::string& path, const std::string& content) { + FILE* f = fopen(path.c_str(), "wb"); + if (!f) { + std::cerr << "Error opening file for writing: " << path << " (" << strerror(errno) << ")\n"; + return; + } + + size_t write_size = fwrite(content.data(), 1, content.size(), f); + fclose(f); + if (write_size != content.size()) { + std::cerr << "Error writing file: " << path << " (" << strerror(errno) << ")\n"; + return; + } +} + +void write_file_if_changed(const std::string& path, const std::string& content) { + std::string existing = read_binary_file(path, true); + if (existing != content) { + write_binary_file(path, content); + } +} + + // variables to track number of compiles in progress static uint32_t compile_count = 0; static std::mutex compile_count_mutex; static std::condition_variable compile_count_cond; +static bool generate_dep_file = true; -void string_to_spv_func(const std::string& _name, const std::string& in_fname, const std::map& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) { - std::string name = _name + (f16acc ? "_f16acc" : "") + (coopmat ? "_cm1" : "") + (coopmat2 ? "_cm2" : (fp16 ? "" : "_fp32")); - std::string out_fname = join_paths(output_dir, name + ".spv"); - std::string in_path = join_paths(input_dir, in_fname); +void decrement_compile_count(uint32_t * count) { + if (count) { + std::lock_guard guard(compile_count_mutex); + assert(compile_count > 0); + compile_count--; + compile_count_cond.notify_all(); + } +} +using compile_count_guard = std::unique_ptr; + +compile_count_guard acquire_compile_slot() { + // wait until fewer than N compiles are in progress. + // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors. + uint32_t N = 16; + std::unique_lock guard(compile_count_mutex); + compile_count_cond.wait(guard, [N] { return compile_count < N; }); + compile_count++; + return compile_count_guard(&compile_count, &decrement_compile_count); +} + +void string_to_spv_func(std::string name, std::string in_path, std::string out_path, std::map defines, bool coopmat, bool dep_file, compile_count_guard slot) { std::string target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2"; // disable spirv-opt for coopmat shaders for https://github.com/ggerganov/llama.cpp/issues/10734 @@ -249,11 +321,17 @@ void string_to_spv_func(const std::string& _name, const std::string& in_fname, c std::string opt_level = (coopmat || name.find("bf16") != std::string::npos) ? "" : "-O"; #ifdef _WIN32 - std::vector cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, "\"" + in_path + "\"", "-o", "\"" + out_fname + "\""}; + std::vector cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, "\"" + in_path + "\"", "-o", "\"" + out_path + "\""}; #else - std::vector cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, in_path, "-o", out_fname}; + std::vector cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, in_path, "-o", out_path}; #endif + if (dep_file) { + cmd.push_back("-MD"); + cmd.push_back("-MF"); + cmd.push_back("\"" + target_cpp + ".d\""); + } + #ifdef GGML_VULKAN_SHADER_DEBUG_INFO cmd.push_back("-g"); #endif @@ -281,17 +359,23 @@ void string_to_spv_func(const std::string& _name, const std::string& in_fname, c return; } + if (dep_file) { + // replace .spv output path with the embed .cpp path which is used as output in CMakeLists.txt + std::string dep = read_binary_file(target_cpp + ".d", true); + if (!dep.empty()) { + size_t pos = dep.find(out_path); + if (pos != std::string::npos) { + dep.replace(pos, out_path.length(), target_cpp); + } + write_binary_file(target_cpp + ".d", dep); + } + } + std::lock_guard guard(lock); - shader_fnames.push_back(std::make_pair(name, out_fname)); + shader_fnames.push_back(std::make_pair(name, out_path)); } catch (const std::exception& e) { std::cerr << "Error executing command for " << name << ": " << e.what() << std::endl; } - { - std::lock_guard guard(compile_count_mutex); - assert(compile_count > 0); - compile_count--; - } - compile_count_cond.notify_all(); } std::map merge_maps(const std::map& a, const std::map& b) { @@ -301,18 +385,24 @@ std::map merge_maps(const std::map> compiles; -void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) { - { - // wait until fewer than N compiles are in progress. - // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors. - uint32_t N = 16; - std::unique_lock guard(compile_count_mutex); - while (compile_count >= N) { - compile_count_cond.wait(guard); - } - compile_count++; +void string_to_spv(std::string name, const std::string& source, const std::map& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) { + name = name + (f16acc ? "_f16acc" : "") + (coopmat ? "_cm1" : "") + (coopmat2 ? "_cm2" : (fp16 ? "" : "_fp32")); + std::string out_path = join_paths(output_dir, name + ".spv"); + + if (input_filepath == "") { + // No input source to compile, only generate header for all shaders + shader_fnames.push_back(std::pair(name, out_path)); + return; + } else if (basename(input_filepath) != source) { + // Only compile shader variants matching the input filename + return; } - compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc)); + + compile_count_guard slot = acquire_compile_slot(); + compiles.push_back(std::async( + string_to_spv_func, name, input_filepath, out_path, defines, coopmat, generate_dep_file, std::move(slot))); + // Don't write the same dep file from multiple processes + generate_dep_file = false; } void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool coopmat2, bool f16acc) { @@ -485,7 +575,6 @@ void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool c } void process_shaders() { - std::cout << "ggml_vulkan: Generating and compiling shaders to SPIR-V" << std::endl; std::map base_dict = {{"FLOAT_TYPE", "float"}}; // matmul @@ -837,11 +926,11 @@ void process_shaders() { } void write_output_files() { - FILE* hdr = fopen(target_hpp.c_str(), "w"); - FILE* src = fopen(target_cpp.c_str(), "w"); + std::stringstream hdr = make_generic_stringstream(); + std::stringstream src = make_generic_stringstream(); - fprintf(hdr, "#include \n\n"); - fprintf(src, "#include \"%s\"\n\n", basename(target_hpp).c_str()); + hdr << "#include \n\n"; + src << "#include \"" << basename(target_hpp) << "\"\n\n"; std::sort(shader_fnames.begin(), shader_fnames.end()); for (const auto& pair : shader_fnames) { @@ -853,91 +942,85 @@ void write_output_files() { const std::string& path = pair.second; #endif - FILE* spv = fopen(path.c_str(), "rb"); - if (!spv) { - std::cerr << "Error opening SPIR-V file: " << path << " (" << strerror(errno) << ")\n"; - continue; - } + hdr << "extern const uint64_t " << name << "_len;\n"; + hdr << "extern const unsigned char " << name << "_data[];\n\n"; - fseek(spv, 0, SEEK_END); - size_t size = ftell(spv); - fseek(spv, 0, SEEK_SET); + if (input_filepath != "") { + std::string data = read_binary_file(path); + if (data.empty()) { + continue; + } - std::vector data(size); - size_t read_size = fread(data.data(), 1, size, spv); - fclose(spv); - if (read_size != size) { - std::cerr << "Error reading SPIR-V file: " << path << " (" << strerror(errno) << ")\n"; - continue; - } - - fprintf(hdr, "extern unsigned char %s_data[%zu];\n", name.c_str(), size); - fprintf(hdr, "const uint64_t %s_len = %zu;\n\n", name.c_str(), size); - - fprintf(src, "unsigned char %s_data[%zu] = {\n", name.c_str(), size); - for (size_t i = 0; i < size; ++i) { - fprintf(src, "0x%02x,", data[i]); - if ((i + 1) % 12 == 0) fprintf(src, "\n"); - } - fprintf(src, "\n};\n\n"); - - if (!no_clean) { - std::remove(path.c_str()); + src << "const uint64_t " << name << "_len = " << data.size() << ";\n"; + src << "const unsigned char " << name << "_data[" << data.size() << "] = {\n" << std::hex; + auto bytes = reinterpret_cast(data.data()); + for (size_t i = 0; i < data.size(); ++i) { + src << "0x" << static_cast(bytes[i]) << ","; + if ((i + 1) % 12 == 0) src << "\n"; + } + src << std::dec << "\n};\n\n"; } } std::string suffixes[2] = {"_f32", "_f16"}; - for (const char *op : {"add", "sub", "mul", "div", "add_rms"}) { - fprintf(hdr, "extern unsigned char *%s_data[2][2][2][2];\n", op); - fprintf(hdr, "extern uint64_t %s_len[2][2][2][2];\n", op); - std::string data = "unsigned char *" + std::string(op) + "_data[2][2][2][2] = "; - std::string len = "uint64_t " + std::string(op) + "_len[2][2][2][2] = "; + for (auto op : {"add", "sub", "mul", "div", "add_rms"}) { + hdr << "extern const void * " << op << "_data[2][2][2][2];\n"; + hdr << "extern const uint64_t " << op << "_len[2][2][2][2];\n"; + + std::string op_file = op == "add_rms" ? "add.comp" : std::string(op) + ".comp"; + if (basename(input_filepath) != op_file) { + continue; + } + std::stringstream data = make_generic_stringstream(); + std::stringstream len = make_generic_stringstream(); + data << "const void * " << op << "_data[2][2][2][2] = "; + len << "const uint64_t " << op << "_len[2][2][2][2] = "; for (uint32_t t0 = 0; t0 < 2; ++t0) { if (t0 == 0) { - data += "{"; - len += "{"; + data << "{"; + len << "{"; } for (uint32_t t1 = 0; t1 < 2; ++t1) { if (t1 == 0) { - data += "{"; - len += "{"; + data << "{"; + len << "{"; } for (uint32_t t2 = 0; t2 < 2; ++t2) { if (t2 == 0) { - data += "{"; - len += "{"; + data << "{"; + len << "{"; } for (uint32_t rte = 0; rte < 2; ++rte) { if (rte == 0) { - data += "{"; - len += "{"; + data << "{"; + len << "{"; } - data += op + suffixes[t0] + suffixes[t1] + suffixes[t2] + ((rte != 0) ? "_rte" : ""); - len += op + suffixes[t0] + suffixes[t1] + suffixes[t2] + ((rte != 0) ? "_rte" : ""); - data += "_data,"; - len += "_len,"; + data << op << suffixes[t0] << suffixes[t1] << suffixes[t2] << ((rte != 0) ? "_rte" : ""); + len << op << suffixes[t0] << suffixes[t1] << suffixes[t2] << ((rte != 0) ? "_rte" : ""); + data << "_data,"; + len << "_len,"; if (rte == 1) { - data += "}, "; - len += "}, "; + data << "}, "; + len << "}, "; } } if (t2 == 1) { - data += "}, "; - len += "}, "; + data << "}, "; + len << "}, "; } } if (t1 == 1) { - data += "}, "; - len += "}, "; + data << "}, "; + len << "}, "; } } if (t0 == 1) { - data += "};\n"; - len += "};\n"; + data << "};\n"; + len << "};\n"; } } - fputs(data.c_str(), src); - fputs(len.c_str(), src); + src << data.str(); + src << len.str(); } std::vector btypes = {"f16", "f32"}; @@ -951,20 +1034,25 @@ void write_output_files() { if (btype == "q8_1" && !is_legacy_quant(tname)) { continue; } - fprintf(hdr, "extern unsigned char *arr_dmmv_%s_%s_f32_data[3];\n", tname.c_str(), btype.c_str()); - fprintf(hdr, "extern uint64_t arr_dmmv_%s_%s_f32_len[3];\n", tname.c_str(), btype.c_str()); - std::string data = "unsigned char *arr_dmmv_" + tname + "_" + btype + "_f32_data[3] = {mul_mat_vec_" + tname + "_" + btype + "_f32_data, mul_mat_vec_" + tname + "_" + btype + "_f32_subgroup_data, mul_mat_vec_" + tname + "_" + btype + "_f32_subgroup_no_shmem_data};\n"; - std::string len = "uint64_t arr_dmmv_" + tname + "_" + btype + "_f32_len[3] = {mul_mat_vec_" + tname + "_" + btype + "_f32_len, mul_mat_vec_" + tname + "_" + btype + "_f32_subgroup_len, mul_mat_vec_" + tname + "_" + btype + "_f32_subgroup_no_shmem_len};\n"; - fputs(data.c_str(), src); - fputs(len.c_str(), src); + hdr << "extern const void * arr_dmmv_" << tname << "_" << btype << "_f32_data[3];\n"; + hdr << "extern const uint64_t arr_dmmv_" << tname << "_" << btype << "_f32_len[3];\n"; + if (basename(input_filepath) == "mul_mat_vec.comp") { + src << "const void * arr_dmmv_" << tname << "_" << btype << "_f32_data[3] = {mul_mat_vec_" << tname << "_" << btype << "_f32_data, mul_mat_vec_" << tname << "_" << btype << "_f32_subgroup_data, mul_mat_vec_" << tname << "_" << btype << "_f32_subgroup_no_shmem_data};\n"; + src << "const uint64_t arr_dmmv_" << tname << "_" << btype << "_f32_len[3] = {mul_mat_vec_" << tname << "_" << btype << "_f32_len, mul_mat_vec_" << tname << "_" << btype << "_f32_subgroup_len, mul_mat_vec_" << tname << "_" << btype << "_f32_subgroup_no_shmem_len};\n"; + } } } - fclose(hdr); - fclose(src); -} + if (input_filepath == "") { + write_file_if_changed(target_hpp, hdr.str()); + } + if (target_cpp != "") { + write_binary_file(target_cpp, src.str()); + } } +} // namespace + int main(int argc, char** argv) { std::map args; for (int i = 1; i < argc; ++i) { @@ -982,8 +1070,8 @@ int main(int argc, char** argv) { if (args.find("--glslc") != args.end()) { GLSLC = args["--glslc"]; // Path to glslc } - if (args.find("--input-dir") != args.end()) { - input_dir = args["--input-dir"]; // Directory containing shader sources + if (args.find("--source") != args.end()) { + input_filepath = args["--source"]; // The shader source file to compile } if (args.find("--output-dir") != args.end()) { output_dir = args["--output-dir"]; // Directory for containing SPIR-V output @@ -994,14 +1082,6 @@ int main(int argc, char** argv) { if (args.find("--target-cpp") != args.end()) { target_cpp = args["--target-cpp"]; // Path to generated cpp file } - if (args.find("--no-clean") != args.end()) { - no_clean = true; // Keep temporary SPIR-V files in output-dir after build - } - - if (!directory_exists(input_dir)) { - std::cerr << "\"" << input_dir << "\" must be a valid directory containing shader sources" << std::endl; - return EXIT_FAILURE; - } if (!directory_exists(output_dir)) { if (!create_directory(output_dir)) { From 898acba6816ad23b6a9491347d30e7570bffadfd Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Sat, 4 Oct 2025 12:49:16 +0300 Subject: [PATCH 2/4] rpc : add support for multiple devices (#16276) * rpc : add support for multiple devices Allow rpc-server to expose multiple devices from a single endpoint. Change RPC protocol to include device identifier where needed. closes: #15210 * fixes * use ggml_backend_reg_t * address review comments * fix llama-bench backend report * address review comments, change device naming * fix cmd order --- common/arg.cpp | 16 +- ggml/include/ggml-backend.h | 2 + ggml/include/ggml-rpc.h | 17 +- ggml/src/ggml-backend-impl.h | 3 - ggml/src/ggml-rpc/ggml-rpc.cpp | 401 +++++++++++++++++++++--------- tools/llama-bench/llama-bench.cpp | 51 ++-- tools/rpc/rpc-server.cpp | 158 ++++++------ 7 files changed, 403 insertions(+), 245 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 577048c20..a020ac441 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1615,18 +1615,14 @@ static void add_rpc_devices(const std::string & servers) { if (!rpc_reg) { throw std::invalid_argument("failed to find RPC backend"); } - typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint); - ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); - if (!ggml_backend_rpc_add_device_fn) { - throw std::invalid_argument("failed to find RPC device add function"); + typedef ggml_backend_reg_t (*ggml_backend_rpc_add_server_t)(const char * endpoint); + ggml_backend_rpc_add_server_t ggml_backend_rpc_add_server_fn = (ggml_backend_rpc_add_server_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server"); + if (!ggml_backend_rpc_add_server_fn) { + throw std::invalid_argument("failed to find RPC add server function"); } for (const auto & server : rpc_servers) { - ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); - if (dev) { - ggml_backend_device_register(dev); - } else { - throw std::invalid_argument("failed to register RPC device"); - } + auto reg = ggml_backend_rpc_add_server_fn(server.c_str()); + ggml_backend_register(reg); } } diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 62b6d65e5..f1b740785 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -215,6 +215,8 @@ extern "C" { // Backend registry // + GGML_API void ggml_backend_register(ggml_backend_reg_t reg); + GGML_API void ggml_backend_device_register(ggml_backend_dev_t device); // Backend (reg) enumeration diff --git a/ggml/include/ggml-rpc.h b/ggml/include/ggml-rpc.h index 1e6741127..72eff0027 100644 --- a/ggml/include/ggml-rpc.h +++ b/ggml/include/ggml-rpc.h @@ -7,26 +7,25 @@ extern "C" { #endif -#define RPC_PROTO_MAJOR_VERSION 2 +#define RPC_PROTO_MAJOR_VERSION 3 #define RPC_PROTO_MINOR_VERSION 0 #define RPC_PROTO_PATCH_VERSION 0 #define GGML_RPC_MAX_SERVERS 16 // backend API -GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint); +GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device); GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend); -GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint); +GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint, uint32_t device); -GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total); +GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total); -GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, - const char * cache_dir, - size_t free_mem, size_t total_mem); +GGML_BACKEND_API void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir, + size_t n_threads, size_t n_devices, + ggml_backend_dev_t * devices, size_t * free_mem, size_t * total_mem); GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void); - -GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint); +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint); #ifdef __cplusplus } diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h index 07784d6f6..6792ba986 100644 --- a/ggml/src/ggml-backend-impl.h +++ b/ggml/src/ggml-backend-impl.h @@ -209,9 +209,6 @@ extern "C" { void * context; }; - // Internal backend registry API - GGML_API void ggml_backend_register(ggml_backend_reg_t reg); - // Add backend dynamic loading support to the backend // Initialize the backend diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index f99681c84..1a8739e78 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -105,9 +105,12 @@ enum rpc_cmd { RPC_CMD_INIT_TENSOR, RPC_CMD_GET_ALLOC_SIZE, RPC_CMD_HELLO, + RPC_CMD_DEVICE_COUNT, RPC_CMD_COUNT, }; +static_assert(RPC_CMD_HELLO == 14, "RPC_CMD_HELLO must be always 14"); + // Try RPC_CMD_SET_TENSOR_HASH first when data size is larger than this threshold const size_t HASH_THRESHOLD = 10 * 1024 * 1024; @@ -117,7 +120,12 @@ struct rpc_msg_hello_rsp { uint8_t patch; }; +struct rpc_msg_device_count_rsp { + uint32_t device_count; +}; + struct rpc_msg_get_alloc_size_req { + uint32_t device; rpc_tensor tensor; }; @@ -130,6 +138,7 @@ struct rpc_msg_init_tensor_req { }; struct rpc_msg_alloc_buffer_req { + uint32_t device; uint64_t size; }; @@ -138,10 +147,18 @@ struct rpc_msg_alloc_buffer_rsp { uint64_t remote_size; }; +struct rpc_msg_get_alignment_req { + uint32_t device; +}; + struct rpc_msg_get_alignment_rsp { uint64_t alignment; }; +struct rpc_msg_get_max_size_req { + uint32_t device; +}; + struct rpc_msg_get_max_size_rsp { uint64_t max_size; }; @@ -192,6 +209,10 @@ struct rpc_msg_graph_compute_rsp { uint8_t result; }; +struct rpc_msg_get_device_memory_req { + uint32_t device; +}; + struct rpc_msg_get_device_memory_rsp { uint64_t free_mem; uint64_t total_mem; @@ -207,13 +228,15 @@ static ggml_guid_t ggml_backend_rpc_guid() { struct ggml_backend_rpc_buffer_type_context { std::string endpoint; + uint32_t device; std::string name; - size_t alignment; - size_t max_size; + size_t alignment; + size_t max_size; }; struct ggml_backend_rpc_context { std::string endpoint; + uint32_t device; std::string name; }; @@ -653,7 +676,7 @@ static const char * ggml_backend_rpc_buffer_type_name(ggml_backend_buffer_type_t static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context; - rpc_msg_alloc_buffer_req request = {size}; + rpc_msg_alloc_buffer_req request = {buft_ctx->device, size}; rpc_msg_alloc_buffer_rsp response; auto sock = get_socket(buft_ctx->endpoint); bool status = send_rpc_cmd(sock, RPC_CMD_ALLOC_BUFFER, &request, sizeof(request), &response, sizeof(response)); @@ -669,9 +692,10 @@ static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer(ggml_back } } -static size_t get_alignment(const std::shared_ptr & sock) { +static size_t get_alignment(const std::shared_ptr & sock, uint32_t device) { + rpc_msg_get_alignment_req request = {device}; rpc_msg_get_alignment_rsp response; - bool status = send_rpc_cmd(sock, RPC_CMD_GET_ALIGNMENT, nullptr, 0, &response, sizeof(response)); + bool status = send_rpc_cmd(sock, RPC_CMD_GET_ALIGNMENT, &request, sizeof(request), &response, sizeof(response)); RPC_STATUS_ASSERT(status); return response.alignment; } @@ -681,9 +705,10 @@ static size_t ggml_backend_rpc_buffer_type_get_alignment(ggml_backend_buffer_typ return buft_ctx->alignment; } -static size_t get_max_size(const std::shared_ptr & sock) { +static size_t get_max_size(const std::shared_ptr & sock, uint32_t device) { + rpc_msg_get_max_size_req request = {device}; rpc_msg_get_max_size_rsp response; - bool status = send_rpc_cmd(sock, RPC_CMD_GET_MAX_SIZE, nullptr, 0, &response, sizeof(response)); + bool status = send_rpc_cmd(sock, RPC_CMD_GET_MAX_SIZE, &request, sizeof(request), &response, sizeof(response)); RPC_STATUS_ASSERT(status); return response.max_size; } @@ -700,7 +725,7 @@ static size_t ggml_backend_rpc_buffer_type_get_alloc_size(ggml_backend_buffer_ty auto sock = get_socket(buft_ctx->endpoint); rpc_msg_get_alloc_size_req request; - + request.device = buft_ctx->device; request.tensor = serialize_tensor(tensor); rpc_msg_get_alloc_size_rsp response; @@ -754,7 +779,7 @@ static void add_tensor(ggml_tensor * tensor, std::vector & tensors, tensors.push_back(serialize_tensor(tensor)); } -static void serialize_graph(const ggml_cgraph * cgraph, std::vector & output) { +static void serialize_graph(uint32_t device, const ggml_cgraph * cgraph, std::vector & output) { uint32_t n_nodes = cgraph->n_nodes; std::vector tensors; std::unordered_set visited; @@ -762,24 +787,29 @@ static void serialize_graph(const ggml_cgraph * cgraph, std::vector & o add_tensor(cgraph->nodes[i], tensors, visited); } // serialization format: - // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) | + // | device (4 bytes) | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) | uint32_t n_tensors = tensors.size(); - int output_size = sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(rpc_tensor); + int output_size = 2*sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(rpc_tensor); output.resize(output_size, 0); - memcpy(output.data(), &n_nodes, sizeof(n_nodes)); + uint8_t * dest = output.data(); + memcpy(dest, &device, sizeof(device)); + dest += sizeof(device); + memcpy(dest, &n_nodes, sizeof(n_nodes)); + dest += sizeof(n_nodes); for (uint32_t i = 0; i < n_nodes; i++) { - memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t)); + memcpy(dest + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t)); } - uint32_t * out_ntensors = (uint32_t *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t)); - *out_ntensors = n_tensors; - rpc_tensor * out_tensors = (rpc_tensor *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t)); + dest += n_nodes * sizeof(uint64_t); + memcpy(dest, &n_tensors, sizeof(n_tensors)); + dest += sizeof(n_tensors); + rpc_tensor * out_tensors = (rpc_tensor *)dest; memcpy(out_tensors, tensors.data(), n_tensors * sizeof(rpc_tensor)); } static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { ggml_backend_rpc_context * rpc_ctx = (ggml_backend_rpc_context *)backend->context; std::vector input; - serialize_graph(cgraph, input); + serialize_graph(rpc_ctx->device, cgraph, input); rpc_msg_graph_compute_rsp response; auto sock = get_socket(rpc_ctx->endpoint); bool status = send_rpc_cmd(sock, RPC_CMD_GRAPH_COMPUTE, input.data(), input.size(), &response, sizeof(response)); @@ -804,12 +834,13 @@ static ggml_backend_i ggml_backend_rpc_interface = { /* .graph_optimize = */ NULL, }; -ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) { +ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint, uint32_t device) { static std::mutex mutex; std::lock_guard lock(mutex); + std::string buft_name = "RPC" + std::to_string(device) + "[" + std::string(endpoint) + "]"; // NOTE: buffer types are allocated and never freed; this is by design static std::unordered_map buft_map; - auto it = buft_map.find(endpoint); + auto it = buft_map.find(buft_name); if (it != buft_map.end()) { return it->second; } @@ -818,34 +849,37 @@ ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) { GGML_LOG_ERROR("Failed to connect to %s\n", endpoint); return nullptr; } - size_t alignment = get_alignment(sock); - size_t max_size = get_max_size(sock); + size_t alignment = get_alignment(sock, device); + size_t max_size = get_max_size(sock, device); ggml_backend_rpc_buffer_type_context * buft_ctx = new ggml_backend_rpc_buffer_type_context { /* .endpoint = */ endpoint, - /* .name = */ "RPC[" + std::string(endpoint) + "]", + /* .device = */ device, + /* .name = */ buft_name, /* .alignment = */ alignment, /* .max_size = */ max_size }; - + auto reg = ggml_backend_rpc_add_server(endpoint); ggml_backend_buffer_type_t buft = new ggml_backend_buffer_type { /* .iface = */ ggml_backend_rpc_buffer_type_interface, - /* .device = */ ggml_backend_rpc_add_device(endpoint), + /* .device = */ ggml_backend_reg_dev_get(reg, device), /* .context = */ buft_ctx }; - buft_map[endpoint] = buft; + buft_map[buft_name] = buft; return buft; } -ggml_backend_t ggml_backend_rpc_init(const char * endpoint) { +ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device) { + std::string dev_name = "RPC" + std::to_string(device) + "[" + std::string(endpoint) + "]"; ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context { - /* .endpoint = */ endpoint, - /* .name = */ "RPC[" + std::string(endpoint) + "]", + /* .endpoint = */ endpoint, + /* .device = */ device, + /* .name = */ dev_name }; - + auto reg = ggml_backend_rpc_add_server(endpoint); ggml_backend_t backend = new ggml_backend { /* .guid = */ ggml_backend_rpc_guid(), /* .iface = */ ggml_backend_rpc_interface, - /* .device = */ ggml_backend_rpc_add_device(endpoint), + /* .device = */ ggml_backend_reg_dev_get(reg, device), /* .context = */ ctx }; return backend; @@ -855,37 +889,39 @@ bool ggml_backend_is_rpc(ggml_backend_t backend) { return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_rpc_guid()); } -static void get_device_memory(const std::shared_ptr & sock, size_t * free, size_t * total) { +static void get_device_memory(const std::shared_ptr & sock, uint32_t device, size_t * free, size_t * total) { + rpc_msg_get_device_memory_req request; + request.device = device; rpc_msg_get_device_memory_rsp response; - bool status = send_rpc_cmd(sock, RPC_CMD_GET_DEVICE_MEMORY, nullptr, 0, &response, sizeof(response)); + bool status = send_rpc_cmd(sock, RPC_CMD_GET_DEVICE_MEMORY, &request, sizeof(request), &response, sizeof(response)); RPC_STATUS_ASSERT(status); *free = response.free_mem; *total = response.total_mem; } -void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total) { +void ggml_backend_rpc_get_device_memory(const char * endpoint, uint32_t device, size_t * free, size_t * total) { auto sock = get_socket(endpoint); if (sock == nullptr) { *free = 0; *total = 0; return; } - get_device_memory(sock, free, total); + get_device_memory(sock, device, free, total); } // RPC server-side implementation class rpc_server { public: - rpc_server(ggml_backend_t backend, const char * cache_dir) - : backend(backend), cache_dir(cache_dir) { + rpc_server(std::vector backends, const char * cache_dir) + : backends(std::move(backends)), cache_dir(cache_dir) { } ~rpc_server(); void hello(rpc_msg_hello_rsp & response); - void alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_alloc_buffer_rsp & response); - void get_alignment(rpc_msg_get_alignment_rsp & response); - void get_max_size(rpc_msg_get_max_size_rsp & response); + bool alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_alloc_buffer_rsp & response); + bool get_alignment(const rpc_msg_get_alignment_req & request, rpc_msg_get_alignment_rsp & response); + bool get_max_size(const rpc_msg_get_max_size_req & request, rpc_msg_get_max_size_rsp & response); bool buffer_get_base(const rpc_msg_buffer_get_base_req & request, rpc_msg_buffer_get_base_rsp & response); bool free_buffer(const rpc_msg_free_buffer_req & request); bool buffer_clear(const rpc_msg_buffer_clear_req & request); @@ -906,7 +942,7 @@ private: std::unordered_map & tensor_map); - ggml_backend_t backend; + std::vector backends; const char * cache_dir; std::unordered_set buffers; }; @@ -919,6 +955,10 @@ void rpc_server::hello(rpc_msg_hello_rsp & response) { } bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_msg_get_alloc_size_rsp & response) { + uint32_t dev_id = request.device; + if (dev_id >= backends.size()) { + return false; + } ggml_backend_buffer_type_t buft; struct ggml_init_params params { /*.mem_size =*/ ggml_tensor_overhead(), @@ -935,10 +975,10 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_ GGML_LOG_ERROR("Null tensor pointer passed to server get_alloc_size function.\n"); return false; } - LOG_DBG("[%s] buffer: %p, data: %p\n", __func__, (void*)tensor->buffer, tensor->data); + LOG_DBG("[%s] device: %d, buffer: %p, data: %p\n", __func__, dev_id, (void*)tensor->buffer, tensor->data); if (tensor->buffer == nullptr) { //No buffer allocated. - buft = ggml_backend_get_default_buffer_type(backend); + buft = ggml_backend_get_default_buffer_type(backends[dev_id]); } else { buft = tensor->buffer->buft; } @@ -948,33 +988,49 @@ bool rpc_server::get_alloc_size(const rpc_msg_get_alloc_size_req & request, rpc_ return true; } -void rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_alloc_buffer_rsp & response) { - ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend); +bool rpc_server::alloc_buffer(const rpc_msg_alloc_buffer_req & request, rpc_msg_alloc_buffer_rsp & response) { + uint32_t dev_id = request.device; + if (dev_id >= backends.size()) { + return false; + } + ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backends[dev_id]); ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, request.size); response.remote_ptr = 0; response.remote_size = 0; if (buffer != nullptr) { response.remote_ptr = reinterpret_cast(buffer); response.remote_size = buffer->size; - LOG_DBG("[%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 ", remote_size: %" PRIu64 "\n", __func__, request.size, response.remote_ptr, response.remote_size); + LOG_DBG("[%s] device: %d, size: %" PRIu64 " -> remote_ptr: %" PRIx64 ", remote_size: %" PRIu64 "\n", + __func__, dev_id, request.size, response.remote_ptr, response.remote_size); buffers.insert(buffer); } else { - LOG_DBG("[%s] size: %" PRIu64 " -> failed\n", __func__, request.size); + LOG_DBG("[%s] device: %d, size: %" PRIu64 " -> failed\n", __func__, dev_id, request.size); } + return true; } -void rpc_server::get_alignment(rpc_msg_get_alignment_rsp & response) { - ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend); +bool rpc_server::get_alignment(const rpc_msg_get_alignment_req & request, rpc_msg_get_alignment_rsp & response) { + uint32_t dev_id = request.device; + if (dev_id >= backends.size()) { + return false; + } + ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backends[dev_id]); size_t alignment = ggml_backend_buft_get_alignment(buft); - LOG_DBG("[%s] alignment: %lu\n", __func__, alignment); + LOG_DBG("[%s] device: %d, alignment: %lu\n", __func__, dev_id, alignment); response.alignment = alignment; + return true; } -void rpc_server::get_max_size(rpc_msg_get_max_size_rsp & response) { - ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend); +bool rpc_server::get_max_size(const rpc_msg_get_max_size_req & request, rpc_msg_get_max_size_rsp & response) { + uint32_t dev_id = request.device; + if (dev_id >= backends.size()) { + return false; + } + ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backends[dev_id]); size_t max_size = ggml_backend_buft_get_max_size(buft); - LOG_DBG("[%s] max_size: %lu\n", __func__, max_size); + LOG_DBG("[%s] device: %d, max_size: %lu\n", __func__, dev_id, max_size); response.max_size = max_size; + return true; } bool rpc_server::buffer_get_base(const rpc_msg_buffer_get_base_req & request, rpc_msg_buffer_get_base_rsp & response) { @@ -1332,23 +1388,33 @@ ggml_tensor * rpc_server::create_node(uint64_t id, bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph_compute_rsp & response) { // serialization format: - // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) | - if (input.size() < sizeof(uint32_t)) { + // | device (4 bytes) | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(rpc_tensor)) | + if (input.size() < 2*sizeof(uint32_t)) { + return false; + } + const uint8_t * src = input.data(); + uint32_t device; + memcpy(&device, src, sizeof(device)); + src += sizeof(device); + if (device >= backends.size()) { return false; } uint32_t n_nodes; - memcpy(&n_nodes, input.data(), sizeof(n_nodes)); - if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t)) { + memcpy(&n_nodes, src, sizeof(n_nodes)); + src += sizeof(n_nodes); + if (input.size() < 2*sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t)) { return false; } - const uint64_t * nodes = (const uint64_t *)(input.data() + sizeof(n_nodes)); + const uint64_t * nodes = (const uint64_t *)src; + src += n_nodes*sizeof(uint64_t); uint32_t n_tensors; - memcpy(&n_tensors, input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t), sizeof(n_tensors)); - if (input.size() < sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t) + n_tensors*sizeof(rpc_tensor)) { + memcpy(&n_tensors, src, sizeof(n_tensors)); + src += sizeof(n_tensors); + if (input.size() < 2*sizeof(uint32_t) + n_nodes*sizeof(uint64_t) + sizeof(uint32_t) + n_tensors*sizeof(rpc_tensor)) { return false; } - const rpc_tensor * tensors = (const rpc_tensor *)(input.data() + sizeof(n_nodes) + n_nodes*sizeof(uint64_t) + sizeof(n_tensors)); - LOG_DBG("[%s] n_nodes: %u, n_tensors: %u\n", __func__, n_nodes, n_tensors); + const rpc_tensor * tensors = (const rpc_tensor *)src; + LOG_DBG("[%s] device: %u, n_nodes: %u, n_tensors: %u\n", __func__, device, n_nodes, n_tensors); size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); @@ -1380,7 +1446,7 @@ bool rpc_server::graph_compute(const std::vector & input, rpc_msg_graph return false; } } - ggml_status status = ggml_backend_graph_compute(backend, graph); + ggml_status status = ggml_backend_graph_compute(backends[device], graph); response.result = status; return true; } @@ -1391,9 +1457,9 @@ rpc_server::~rpc_server() { } } -static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir, - sockfd_t sockfd, size_t free_mem, size_t total_mem) { - rpc_server server(backend, cache_dir); +static void rpc_serve_client(const std::vector & backends, const char * cache_dir, + sockfd_t sockfd, const std::vector & free_mem, const std::vector & total_mem) { + rpc_server server(backends, cache_dir); uint8_t cmd; if (!recv_data(sockfd, &cmd, 1)) { return; @@ -1425,13 +1491,26 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir, // HELLO command is handled above return; } + case RPC_CMD_DEVICE_COUNT: { + if (!recv_msg(sockfd, nullptr, 0)) { + return; + } + rpc_msg_device_count_rsp response; + response.device_count = backends.size(); + if (!send_msg(sockfd, &response, sizeof(response))) { + return; + } + break; + } case RPC_CMD_ALLOC_BUFFER: { rpc_msg_alloc_buffer_req request; if (!recv_msg(sockfd, &request, sizeof(request))) { return; } rpc_msg_alloc_buffer_rsp response; - server.alloc_buffer(request, response); + if (!server.alloc_buffer(request, response)) { + return; + } if (!send_msg(sockfd, &response, sizeof(response))) { return; } @@ -1452,22 +1531,28 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir, break; } case RPC_CMD_GET_ALIGNMENT: { - if (!recv_msg(sockfd, nullptr, 0)) { + rpc_msg_get_alignment_req request; + if (!recv_msg(sockfd, &request, sizeof(request))) { return; } rpc_msg_get_alignment_rsp response; - server.get_alignment(response); + if (!server.get_alignment(request, response)) { + return; + } if (!send_msg(sockfd, &response, sizeof(response))) { return; } break; } case RPC_CMD_GET_MAX_SIZE: { - if (!recv_msg(sockfd, nullptr, 0)) { + rpc_msg_get_max_size_req request; + if (!recv_msg(sockfd, &request, sizeof(request))) { return; } rpc_msg_get_max_size_rsp response; - server.get_max_size(response); + if (!server.get_max_size(request, response)) { + return; + } if (!send_msg(sockfd, &response, sizeof(response))) { return; } @@ -1593,12 +1678,19 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir, break; } case RPC_CMD_GET_DEVICE_MEMORY: { - if (!recv_msg(sockfd, nullptr, 0)) { + rpc_msg_get_device_memory_req request; + if (!recv_msg(sockfd, &request, sizeof(request))) { + return; + } + auto dev_id = request.device; + if (dev_id >= backends.size()) { return; } rpc_msg_get_device_memory_rsp response; - response.free_mem = free_mem; - response.total_mem = total_mem; + response.free_mem = free_mem[dev_id]; + response.total_mem = total_mem[dev_id]; + LOG_DBG("[get_device_mem] device: %u, free_mem: %" PRIu64 ", total_mem: %" PRIu64 "\n", dev_id, + response.free_mem, response.total_mem); if (!send_msg(sockfd, &response, sizeof(response))) { return; } @@ -1612,16 +1704,41 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir, } } -void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, - const char * cache_dir, - size_t free_mem, size_t total_mem) { +void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir, + size_t n_threads, size_t n_devices, + ggml_backend_dev_t * devices, size_t * free_mem, size_t * total_mem) { + if (n_devices == 0 || devices == nullptr || free_mem == nullptr || total_mem == nullptr) { + fprintf(stderr, "Invalid arguments to ggml_backend_rpc_start_server\n"); + return; + } + std::vector backends; + std::vector free_mem_vec(free_mem, free_mem + n_devices); + std::vector total_mem_vec(total_mem, total_mem + n_devices); printf("Starting RPC server v%d.%d.%d\n", RPC_PROTO_MAJOR_VERSION, RPC_PROTO_MINOR_VERSION, RPC_PROTO_PATCH_VERSION); printf(" endpoint : %s\n", endpoint); printf(" local cache : %s\n", cache_dir ? cache_dir : "n/a"); - printf(" backend memory : %zu MB\n", free_mem / (1024 * 1024)); + printf("Devices:\n"); + for (size_t i = 0; i < n_devices; i++) { + auto dev = devices[i]; + printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), + total_mem[i] / 1024 / 1024, free_mem[i] / 1024 / 1024); + auto backend = ggml_backend_dev_init(dev, nullptr); + if (!backend) { + fprintf(stderr, "Failed to create backend for device %s\n", dev->iface.get_name(dev)); + return; + } + backends.push_back(backend); + ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr; + if (reg) { + auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"); + if (ggml_backend_set_n_threads_fn) { + ggml_backend_set_n_threads_fn(backend, n_threads); + } + } + } std::string host; int port; @@ -1649,22 +1766,27 @@ void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint fprintf(stderr, "Failed to accept client connection\n"); return; } - printf("Accepted client connection, free_mem=%zu, total_mem=%zu\n", free_mem, total_mem); + printf("Accepted client connection\n"); fflush(stdout); - rpc_serve_client(backend, cache_dir, client_socket->fd, free_mem, total_mem); + rpc_serve_client(backends, cache_dir, client_socket->fd, free_mem_vec, total_mem_vec); printf("Client connection closed\n"); fflush(stdout); } #ifdef _WIN32 WSACleanup(); #endif + for (auto backend : backends) { + ggml_backend_free(backend); + } } // device interface struct ggml_backend_rpc_device_context { std::string endpoint; + uint32_t device; std::string name; + std::string description; }; static const char * ggml_backend_rpc_device_get_name(ggml_backend_dev_t dev) { @@ -1676,15 +1798,13 @@ static const char * ggml_backend_rpc_device_get_name(ggml_backend_dev_t dev) { static const char * ggml_backend_rpc_device_get_description(ggml_backend_dev_t dev) { ggml_backend_rpc_device_context * ctx = (ggml_backend_rpc_device_context *)dev->context; - return ctx->name.c_str(); + return ctx->description.c_str(); } static void ggml_backend_rpc_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { ggml_backend_rpc_device_context * ctx = (ggml_backend_rpc_device_context *)dev->context; - ggml_backend_rpc_get_device_memory(ctx->endpoint.c_str(), free, total); - - GGML_UNUSED(dev); + ggml_backend_rpc_get_device_memory(ctx->endpoint.c_str(), ctx->device, free, total); } static enum ggml_backend_dev_type ggml_backend_rpc_device_get_type(ggml_backend_dev_t dev) { @@ -1710,7 +1830,7 @@ static void ggml_backend_rpc_device_get_props(ggml_backend_dev_t dev, struct ggm static ggml_backend_t ggml_backend_rpc_device_init(ggml_backend_dev_t dev, const char * params) { ggml_backend_rpc_device_context * ctx = (ggml_backend_rpc_device_context *)dev->context; - return ggml_backend_rpc_init(ctx->endpoint.c_str()); + return ggml_backend_rpc_init(ctx->endpoint.c_str(), ctx->device); GGML_UNUSED(params); } @@ -1718,7 +1838,7 @@ static ggml_backend_t ggml_backend_rpc_device_init(ggml_backend_dev_t dev, const static ggml_backend_buffer_type_t ggml_backend_rpc_device_get_buffer_type(ggml_backend_dev_t dev) { ggml_backend_rpc_device_context * ctx = (ggml_backend_rpc_device_context *)dev->context; - return ggml_backend_rpc_buffer_type(ctx->endpoint.c_str()); + return ggml_backend_rpc_buffer_type(ctx->endpoint.c_str(), ctx->device); GGML_UNUSED(dev); } @@ -1736,7 +1856,7 @@ static bool ggml_backend_rpc_device_supports_buft(ggml_backend_dev_t dev, ggml_b } ggml_backend_rpc_buffer_type_context * buft_ctx = (ggml_backend_rpc_buffer_type_context *)buft->context; ggml_backend_rpc_device_context * dev_ctx = (ggml_backend_rpc_device_context *)dev->context; - return buft_ctx->endpoint == dev_ctx->endpoint; + return buft_ctx->endpoint == dev_ctx->endpoint && buft_ctx->device == dev_ctx->device; } static const struct ggml_backend_device_i ggml_backend_rpc_device_i = { @@ -1759,28 +1879,34 @@ static const struct ggml_backend_device_i ggml_backend_rpc_device_i = { // backend reg interface -static const char * ggml_backend_rpc_reg_get_name(ggml_backend_reg_t reg) { - return "RPC"; +struct ggml_backend_rpc_reg_context { + std::string name; + std::vector devices; +}; - GGML_UNUSED(reg); +static const char * ggml_backend_rpc_reg_get_name(ggml_backend_reg_t reg) { + ggml_backend_rpc_reg_context * ctx = (ggml_backend_rpc_reg_context *)reg->context; + return ctx ? ctx->name.c_str() : "RPC"; } static size_t ggml_backend_rpc_reg_get_device_count(ggml_backend_reg_t reg) { - return 0; - - GGML_UNUSED(reg); + ggml_backend_rpc_reg_context * ctx = (ggml_backend_rpc_reg_context *)reg->context; + return ctx ? ctx->devices.size() : 0; } static ggml_backend_dev_t ggml_backend_rpc_reg_get_device(ggml_backend_reg_t reg, size_t index) { - GGML_ABORT("The RPC backend does not have enumerated devices - use ggml_backend_add_device instead"); - - GGML_UNUSED(reg); - GGML_UNUSED(index); + ggml_backend_rpc_reg_context * ctx = (ggml_backend_rpc_reg_context *)reg->context; + if (ctx == nullptr) { + GGML_ABORT("The RPC backend does not have enumerated devices - use ggml_backend_rpc_add_server instead"); + } else { + GGML_ASSERT(index < ctx->devices.size()); + return ctx->devices[index]; + } } static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const char * name) { - if (std::strcmp(name, "ggml_backend_rpc_add_device") == 0) { - return (void *)ggml_backend_rpc_add_device; + if (std::strcmp(name, "ggml_backend_rpc_add_server") == 0) { + return (void *)ggml_backend_rpc_add_server; } if (std::strcmp(name, "ggml_backend_rpc_start_server") == 0) { return (void *)ggml_backend_rpc_start_server; @@ -1807,30 +1933,61 @@ ggml_backend_reg_t ggml_backend_rpc_reg(void) { return &ggml_backend_rpc_reg; } -ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint) { - static std::unordered_map dev_map; - - static std::mutex mutex; - std::lock_guard lock(mutex); - - if (dev_map.find(endpoint) != dev_map.end()) { - return dev_map[endpoint]; - } - - ggml_backend_rpc_device_context * ctx = new ggml_backend_rpc_device_context { - /* .endpoint = */ endpoint, - /* .name = */ "RPC[" + std::string(endpoint) + "]", - }; - - ggml_backend_dev_t dev = new ggml_backend_device { - /* .iface = */ ggml_backend_rpc_device_i, - /* .reg = */ ggml_backend_rpc_reg(), - /* .context = */ ctx, - }; - - dev_map[endpoint] = dev; - - return dev; +static uint32_t ggml_backend_rpc_get_device_count(const char * endpoint) { + auto sock = get_socket(endpoint); + rpc_msg_device_count_rsp response; + bool status = send_rpc_cmd(sock, RPC_CMD_DEVICE_COUNT, nullptr, 0, &response, sizeof(response)); + RPC_STATUS_ASSERT(status); + return response.device_count; } +static const ggml_backend_reg_i ggml_backend_rpc_reg_interface = { + /* .get_name = */ ggml_backend_rpc_reg_get_name, + /* .get_device_count = */ ggml_backend_rpc_reg_get_device_count, + /* .get_device = */ ggml_backend_rpc_reg_get_device, + /* .get_proc_address = */ ggml_backend_rpc_get_proc_address, +}; + +ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint) { + static std::unordered_map reg_map; + static std::mutex mutex; + static uint32_t dev_id = 0; + std::lock_guard lock(mutex); + if (reg_map.find(endpoint) != reg_map.end()) { + return reg_map[endpoint]; + } + uint32_t dev_count = ggml_backend_rpc_get_device_count(endpoint); + if (dev_count == 0) { + return nullptr; + } + ggml_backend_rpc_reg_context * ctx = new ggml_backend_rpc_reg_context; + ctx->name = "RPC[" + std::string(endpoint) + "]"; + for (uint32_t ind = 0; ind < dev_count; ind++) { + std::string dev_name = "RPC" + std::to_string(dev_id); + std::string dev_desc = std::string(endpoint); + ggml_backend_rpc_device_context * dev_ctx = new ggml_backend_rpc_device_context { + /* .endpoint = */ endpoint, + /* .device = */ ind, + /* .name = */ dev_name, + /* .description = */ dev_desc + }; + + ggml_backend_dev_t dev = new ggml_backend_device { + /* .iface = */ ggml_backend_rpc_device_i, + /* .reg = */ ggml_backend_rpc_reg(), + /* .context = */ dev_ctx, + }; + ctx->devices.push_back(dev); + dev_id++; + } + ggml_backend_reg_t reg = new ggml_backend_reg { + /* .api_version = */ GGML_BACKEND_API_VERSION, + /* .iface = */ ggml_backend_rpc_reg_interface, + /* .context = */ ctx + }; + reg_map[endpoint] = reg; + return reg; +} + + GGML_BACKEND_DL_IMPL(ggml_backend_rpc_reg) diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp index 275ba367c..89bc01b48 100644 --- a/tools/llama-bench/llama-bench.cpp +++ b/tools/llama-bench/llama-bench.cpp @@ -168,7 +168,7 @@ static std::vector parse_devices_arg(const std::string & val return devices; } -static std::vector register_rpc_device_list(const std::string & servers) { +static void register_rpc_server_list(const std::string & servers) { auto rpc_servers = string_split(servers, ','); if (rpc_servers.empty()) { throw std::invalid_argument("no RPC servers specified"); @@ -179,36 +179,15 @@ static std::vector register_rpc_device_list(const std::strin throw std::invalid_argument("failed to find RPC backend"); } - using add_rpc_device_fn = ggml_backend_dev_t (*)(const char * endpoint); - auto * ggml_backend_rpc_add_device_fn = (add_rpc_device_fn) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); - if (!ggml_backend_rpc_add_device_fn) { - throw std::invalid_argument("failed to find RPC device add function"); + using add_rpc_server_fn = ggml_backend_reg_t (*)(const char * endpoint); + auto * ggml_backend_rpc_add_server_fn = (add_rpc_server_fn) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server"); + if (!ggml_backend_rpc_add_server_fn) { + throw std::invalid_argument("failed to find RPC add server function"); } - - static std::unordered_set registered; - std::vector devices; for (const auto & server : rpc_servers) { - ggml_backend_dev_t dev = nullptr; - - std::string name = string_format("RPC[%s]", server.c_str()); - - if (registered.find(server) != registered.end()) { - dev = ggml_backend_dev_by_name(name.c_str()); - } - - if (!dev) { - dev = ggml_backend_rpc_add_device_fn(server.c_str()); - if (!dev) { - throw std::invalid_argument(string_format("failed to add RPC device for server '%s'", server.c_str())); - } - ggml_backend_device_register(dev); - registered.insert(server); - } - - devices.push_back(dev); + auto reg = ggml_backend_rpc_add_server_fn(server.c_str()); + ggml_backend_register(reg); } - - return devices; } static std::string devices_to_string(const std::vector & devices) { @@ -714,7 +693,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { break; } try { - register_rpc_device_list(argv[i]); + register_rpc_server_list(argv[i]); } catch (const std::exception & e) { fprintf(stderr, "error: %s\n", e.what()); invalid_param = true; @@ -1368,13 +1347,23 @@ struct test { static std::string get_backend() { std::vector backends; + bool rpc_used = false; for (size_t i = 0; i < ggml_backend_reg_count(); i++) { auto * reg = ggml_backend_reg_get(i); std::string name = ggml_backend_reg_name(reg); - if (name != "CPU") { - backends.push_back(ggml_backend_reg_name(reg)); + if (string_starts_with(name, "RPC")) { + if (ggml_backend_reg_dev_count(reg) > 0) { + rpc_used = true; + } + } else { + if (name != "CPU") { + backends.push_back(ggml_backend_reg_name(reg)); + } } } + if (rpc_used) { + backends.push_back("RPC"); + } return backends.empty() ? "CPU" : join(backends, ","); } diff --git a/tools/rpc/rpc-server.cpp b/tools/rpc/rpc-server.cpp index dc8e077f3..088515612 100644 --- a/tools/rpc/rpc-server.cpp +++ b/tools/rpc/rpc-server.cpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace fs = std::filesystem; @@ -131,24 +132,24 @@ static std::string fs_get_cache_directory() { } struct rpc_server_params { - std::string host = "127.0.0.1"; - int port = 50052; - size_t backend_mem = 0; - bool use_cache = false; - int n_threads = std::max(1U, std::thread::hardware_concurrency()/2); - std::string device; + std::string host = "127.0.0.1"; + int port = 50052; + bool use_cache = false; + int n_threads = std::max(1U, std::thread::hardware_concurrency()/2); + std::vector devices; + std::vector dev_mem; }; static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) { fprintf(stderr, "Usage: %s [options]\n\n", argv[0]); fprintf(stderr, "options:\n"); - fprintf(stderr, " -h, --help show this help message and exit\n"); - fprintf(stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n", params.n_threads); - fprintf(stderr, " -d DEV, --device device to use\n"); - fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str()); - fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port); - fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n"); - fprintf(stderr, " -c, --cache enable local file cache\n"); + fprintf(stderr, " -h, --help show this help message and exit\n"); + fprintf(stderr, " -t, --threads N number of threads for the CPU device (default: %d)\n", params.n_threads); + fprintf(stderr, " -d, --device comma-separated list of devices\n"); + fprintf(stderr, " -H, --host HOST host to bind to (default: %s)\n", params.host.c_str()); + fprintf(stderr, " -p, --port PORT port to bind to (default: %d)\n", params.port); + fprintf(stderr, " -m, --mem memory size for each device (in MB)\n"); + fprintf(stderr, " -c, --cache enable local file cache\n"); fprintf(stderr, "\n"); } @@ -174,17 +175,17 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & if (++i >= argc) { return false; } - params.device = argv[i]; - if (ggml_backend_dev_by_name(params.device.c_str()) == nullptr) { - fprintf(stderr, "error: unknown device: %s\n", params.device.c_str()); - fprintf(stderr, "available devices:\n"); - for (size_t i = 0; i < ggml_backend_dev_count(); i++) { - auto * dev = ggml_backend_dev_get(i); - size_t free, total; - ggml_backend_dev_memory(dev, &free, &total); - printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + const std::regex regex{ R"([,/]+)" }; + std::string dev_str = argv[i]; + std::sregex_token_iterator iter(dev_str.begin(), dev_str.end(), regex, -1); + std::sregex_token_iterator end; + for ( ; iter != end; ++iter) { + try { + params.devices.push_back(*iter); + } catch (const std::exception & ) { + fprintf(stderr, "error: invalid device: %s\n", iter->str().c_str()); + return false; } - return false; } } else if (arg == "-p" || arg == "--port") { if (++i >= argc) { @@ -200,7 +201,19 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & if (++i >= argc) { return false; } - params.backend_mem = std::stoul(argv[i]) * 1024 * 1024; + const std::regex regex{ R"([,/]+)" }; + std::string mem_str = argv[i]; + std::sregex_token_iterator iter(mem_str.begin(), mem_str.end(), regex, -1); + std::sregex_token_iterator end; + for ( ; iter != end; ++iter) { + try { + size_t mem = std::stoul(*iter) * 1024 * 1024; + params.dev_mem.push_back(mem); + } catch (const std::exception & ) { + fprintf(stderr, "error: invalid memory size: %s\n", iter->str().c_str()); + return false; + } + } } else if (arg == "-h" || arg == "--help") { print_usage(argc, argv, params); exit(0); @@ -213,45 +226,46 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & return true; } -static ggml_backend_t create_backend(const rpc_server_params & params) { - ggml_backend_t backend = nullptr; +static std::vector get_devices(const rpc_server_params & params) { + std::vector devices; + if (!params.devices.empty()) { + for (auto device : params.devices) { + ggml_backend_dev_t dev = ggml_backend_dev_by_name(device.c_str()); + if (dev) { + devices.push_back(dev); + } else { + fprintf(stderr, "error: unknown device: %s\n", device.c_str()); + fprintf(stderr, "available devices:\n"); + for (size_t i = 0; i < ggml_backend_dev_count(); i++) { + auto * dev = ggml_backend_dev_get(i); + size_t free, total; + ggml_backend_dev_memory(dev, &free, &total); + printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + } + return {}; + } + } + } - if (!params.device.empty()) { - ggml_backend_dev_t dev = ggml_backend_dev_by_name(params.device.c_str()); + // Try non-CPU devices first + if (devices.empty()) { + for (size_t i = 0; i < ggml_backend_dev_count(); i++) { + ggml_backend_dev_t dev = ggml_backend_dev_get(i); + if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_CPU) { + devices.push_back(dev); + } + } + } + + // If there are no accelerators, fallback to CPU device + if (devices.empty()) { + ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); if (dev) { - backend = ggml_backend_dev_init(dev, nullptr); - if (!backend) { - fprintf(stderr, "Failed to create backend for device %s\n", params.device.c_str()); - return nullptr; - } + devices.push_back(dev); } } - if (!backend) { - backend = ggml_backend_init_best(); - } - - if (backend) { - fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend)); - - // set the number of threads - ggml_backend_dev_t dev = ggml_backend_get_device(backend); - ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr; - if (reg) { - auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads"); - if (ggml_backend_set_n_threads_fn) { - ggml_backend_set_n_threads_fn(backend, params.n_threads); - } - } - } - - return backend; -} - -static void get_backend_memory(ggml_backend_t backend, size_t * free_mem, size_t * total_mem) { - ggml_backend_dev_t dev = ggml_backend_get_device(backend); - GGML_ASSERT(dev != nullptr); - ggml_backend_dev_memory(dev, free_mem, total_mem); + return devices; } int main(int argc, char * argv[]) { @@ -273,18 +287,23 @@ int main(int argc, char * argv[]) { fprintf(stderr, "\n"); } - ggml_backend_t backend = create_backend(params); - if (!backend) { - fprintf(stderr, "Failed to create backend\n"); + auto devices = get_devices(params); + if (devices.empty()) { + fprintf(stderr, "No devices found\n"); return 1; } std::string endpoint = params.host + ":" + std::to_string(params.port); - size_t free_mem, total_mem; - if (params.backend_mem > 0) { - free_mem = params.backend_mem; - total_mem = params.backend_mem; - } else { - get_backend_memory(backend, &free_mem, &total_mem); + std::vector free_mem, total_mem; + for (size_t i = 0; i < devices.size(); i++) { + if (i < params.dev_mem.size()) { + free_mem.push_back(params.dev_mem[i]); + total_mem.push_back(params.dev_mem[i]); + } else { + size_t free, total; + ggml_backend_dev_memory(devices[i], &free, &total); + free_mem.push_back(free); + total_mem.push_back(total); + } } const char * cache_dir = nullptr; std::string cache_dir_str; @@ -309,8 +328,7 @@ int main(int argc, char * argv[]) { return 1; } - start_server_fn(backend, endpoint.c_str(), cache_dir, free_mem, total_mem); - - ggml_backend_free(backend); + start_server_fn(endpoint.c_str(), cache_dir, params.n_threads, devices.size(), + devices.data(), free_mem.data(), total_mem.data()); return 0; } From f39283960b58a92ecc0c72567711318b20e22b55 Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Sat, 4 Oct 2025 16:22:45 +0300 Subject: [PATCH 3/4] rpc : check src buffer when copying tensor (#16421) Only dst buffer is guaranteed to be an RPC buffer. Add check for the src one. --- ggml/src/ggml-rpc/ggml-rpc.cpp | 37 ++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 1a8739e78..aad48d62a 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -631,23 +631,30 @@ static void ggml_backend_rpc_buffer_get_tensor(ggml_backend_buffer_t buffer, con RPC_STATUS_ASSERT(status); } +static bool ggml_backend_buffer_is_rpc(ggml_backend_buffer_t buffer) { + return buffer->iface.free_buffer == ggml_backend_rpc_buffer_free_buffer; +} + static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { - // check if src and dst are on the same server - ggml_backend_buffer_t src_buffer = src->buffer; - ggml_backend_rpc_buffer_context * src_ctx = (ggml_backend_rpc_buffer_context *)src_buffer->context; - ggml_backend_buffer_t dst_buffer = dst->buffer; - ggml_backend_rpc_buffer_context * dst_ctx = (ggml_backend_rpc_buffer_context *)dst_buffer->context; - if (src_ctx->sock != dst_ctx->sock) { - return false; + if (ggml_backend_buffer_is_rpc(src->buffer)) { + // check if src and dst are on the same server + ggml_backend_buffer_t src_buffer = src->buffer; + ggml_backend_rpc_buffer_context * src_ctx = (ggml_backend_rpc_buffer_context *)src_buffer->context; + ggml_backend_buffer_t dst_buffer = dst->buffer; + ggml_backend_rpc_buffer_context * dst_ctx = (ggml_backend_rpc_buffer_context *)dst_buffer->context; + if (src_ctx->sock != dst_ctx->sock) { + return false; + } + ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context; + rpc_msg_copy_tensor_req request; + request.src = serialize_tensor(src); + request.dst = serialize_tensor(dst); + rpc_msg_copy_tensor_rsp response; + bool status = send_rpc_cmd(ctx->sock, RPC_CMD_COPY_TENSOR, &request, sizeof(request), &response, sizeof(response)); + RPC_STATUS_ASSERT(status); + return response.result; } - ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context; - rpc_msg_copy_tensor_req request; - request.src = serialize_tensor(src); - request.dst = serialize_tensor(dst); - rpc_msg_copy_tensor_rsp response; - bool status = send_rpc_cmd(ctx->sock, RPC_CMD_COPY_TENSOR, &request, sizeof(request), &response, sizeof(response)); - RPC_STATUS_ASSERT(status); - return response.result; + return false; } static void ggml_backend_rpc_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { From 86df2c9ae4f2f1ee63d2558a9dc797b98524639b Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Sat, 4 Oct 2025 20:04:27 +0000 Subject: [PATCH 4/4] vulkan: use a more appropriate amount of threads when generating shaders (#16418) * use a more flexible amount of threads * fix windows compile and 0 thread case * nominmax --- ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index e2726f1fa..f0cc24ff3 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -1,5 +1,3 @@ - - #include #include #include @@ -22,6 +20,7 @@ #include #ifdef _WIN32 + #define NOMINMAX #include #include // For _mkdir on Windows #else @@ -306,7 +305,7 @@ using compile_count_guard = std::unique_ptr guard(compile_count_mutex); compile_count_cond.wait(guard, [N] { return compile_count < N; }); compile_count++;