made shaders gen deterministic, update to c++17 (+4 squashed commit)

Squashed commit: [7bb2441b] made shaders gen deterministic [906e02af] Update c++ from 11 to 17 (#1263) * Update c/c++ from 11 to 17 * Update CMakeLists.txt only bump c++ [7ca430ed] C++17 ver [b7dfb55d] give up and switch to c++17 (+1 squashed commits) Squashed commits: [96cfbc48] give up and switch to c++17 (+5 squashed commit) Squashed commit: [19ac7c26] Revert "fixed incorrect number of params" This reverts commit 51388729bc4ffe51ab07ae02ce386219fb5e2876. [45f730da] Revert "fix for c++17" This reverts commit 050ba5f72b3358f958722addb9aaa77ff2e428ee. [51388729] fixed incorrect number of params [8f1ee54e] build latest vk shaders [050ba5f7] fix for c++17
2025-09-11 09:34:37 +00:00 · 2024-12-13 17:55:16 +08:00 · 2024-12-13 17:55:16 +08:00 · a63c2c914d
commit a63c2c914d
parent 46d76d913f
7 changed files with 91 additions and 105 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -59,7 +59,7 @@ option(LLAMA_OPENMP                          "llama: use OpenMP"
 # Compile flags
 #
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED true)
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_C_STANDARD_REQUIRED true)
@ -490,28 +490,28 @@ add_library(common2
            src/unicode.cpp
            src/unicode-data.cpp)
 target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
+target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
 target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
 add_library(sdtype_adapter
            otherarch/sdcpp/sdtype_adapter.cpp)
 target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump
+target_compile_features(sdtype_adapter PUBLIC cxx_std_17) # don't bump
 target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
 add_library(whisper_adapter
            otherarch/whispercpp/whisper_adapter.cpp)
 target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
-target_compile_features(whisper_adapter PUBLIC cxx_std_11) # don't bump
+target_compile_features(whisper_adapter PUBLIC cxx_std_17) # don't bump
 target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
 add_library(gpttype_adapter
            gpttype_adapter.cpp)
 target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
+target_compile_features(gpttype_adapter PUBLIC cxx_std_17) # don't bump
 target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
 set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
@ -519,23 +519,23 @@ if (LLAMA_CUBLAS)
    set(TARGET koboldcpp_cublas)
    add_library(${TARGET} SHARED expose.cpp expose.h)
    target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-    target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
+    target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
    set_target_properties(${TARGET} PROPERTIES PREFIX "")
    set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
    set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
    target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
-    target_compile_features(${TARGET} PRIVATE cxx_std_11)
+    target_compile_features(${TARGET} PRIVATE cxx_std_17)
 endif()
 if (LLAMA_HIPBLAS)
    set(TARGET koboldcpp_hipblas)
    add_library(${TARGET} SHARED expose.cpp expose.h)
    target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
-    target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
+    target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
    set_target_properties(${TARGET} PROPERTIES PREFIX "")
    set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
    set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
    target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
-    target_compile_features(${TARGET} PRIVATE cxx_std_11)
+    target_compile_features(${TARGET} PRIVATE cxx_std_17)
 endif()
--- a/4
+++ b/4
@ -47,7 +47,7 @@ endif
 # Compile flags
 #
-# keep standard at C11 and C++11
+# keep standard at C11 and C++17
 CFLAGS =
 CXXFLAGS =
 ifdef KCPP_DEBUG
@ -55,7 +55,7 @@ ifdef KCPP_DEBUG
 	CXXFLAGS = -g -O0
 endif
 CFLAGS   += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
-CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
+CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
 ifndef KCPP_DEBUG
 	CFLAGS += -DNDEBUG -s
 	CXXFLAGS += -DNDEBUG -s
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@ -64,7 +64,7 @@
 #ifdef _WIN32
-using dl_handle = typename std::remove_pointer<HMODULE>::type;
+using dl_handle = std::remove_pointer_t<HMODULE>;
 struct dl_handle_deleter {
    void operator()(HMODULE handle) {
@ -452,75 +452,70 @@ static std::string backend_filename_suffix() {
 static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
    // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
     // TODO: search system paths
    std::string file_prefix = backend_filename_prefix() + name + "-";
    std::vector<std::string> search_paths;
    if (user_search_path == nullptr) {
        search_paths.push_back("./");
        search_paths.push_back(get_executable_path());
    } else {
 #if defined(_WIN32)
        search_paths.push_back(std::string(user_search_path) + "\\");
 #else
        search_paths.push_back(std::string(user_search_path) + "/");
 #endif
    }
-    //not available as we don't want c++17
+    int best_score = 0;
-    printf("\nggml_backend_load_best NOT AVAILABLE!\n");
+    std::string best_path;
    namespace fs = std::filesystem;
    for (const auto & search_path : search_paths) {
        if (!fs::exists(search_path)) {
            continue;
        }
        for (const auto & entry : fs::directory_iterator(search_path)) {
            if (entry.is_regular_file()) {
                std::string filename = entry.path().filename().string();
                std::string ext = entry.path().extension().string();
                if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
                    dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
                    if (!handle && !silent) {
                        GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
                    }
                    if (handle) {
                        auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
                        if (score_fn) {
                            int s = score_fn();
 #ifndef NDEBUG
                            GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
 #endif
                            if (s > best_score) {
                                best_score = s;
                                best_path = entry.path().string();
                            }
                        } else {
                            if (!silent) {
                                GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
                            }
                        }
                    }
                }
            }
        }
    }
    if (best_score == 0) {
        // try to load the base backend
        for (const auto & search_path : search_paths) {
            std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
            if (fs::exists(path)) {
                return get_reg().load_backend(path.c_str(), silent);
            }
        }
        return nullptr;
    }
-//     std::string file_prefix = backend_filename_prefix() + name + "-";
+    return get_reg().load_backend(best_path.c_str(), silent);
 //     std::vector<std::string> search_paths;
 //     if (user_search_path == nullptr) {
 //         search_paths.push_back("./");
 //         search_paths.push_back(get_executable_path());
 //     } else {
 // #if defined(_WIN32)
 //         search_paths.push_back(std::string(user_search_path) + "\\");
 // #else
 //         search_paths.push_back(std::string(user_search_path) + "/");
 // #endif
 //     }
 //     int best_score = 0;
 //     std::string best_path;
 //     namespace fs = std::filesystem;
 //     for (const auto & search_path : search_paths) {
 //         if (!fs::exists(search_path)) {
 //             continue;
 //         }
 //         for (const auto & entry : fs::directory_iterator(search_path)) {
 //             if (entry.is_regular_file()) {
 //                 std::string filename = entry.path().filename().string();
 //                 std::string ext = entry.path().extension().string();
 //                 if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
 //                     dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
 //                     if (!handle && !silent) {
 //                         GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
 //                     }
 //                     if (handle) {
 //                         auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
 //                         if (score_fn) {
 //                             int s = score_fn();
 // #ifndef NDEBUG
 //                             GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
 // #endif
 //                             if (s > best_score) {
 //                                 best_score = s;
 //                                 best_path = entry.path().string();
 //                             }
 //                         } else {
 //                             if (!silent) {
 //                                 GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
 //                             }
 //                         }
 //                     }
 //                 }
 //             }
 //         }
 //     }
 //     if (best_score == 0) {
 //         // try to load the base backend
 //         for (const auto & search_path : search_paths) {
 //             std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
 //             if (fs::exists(path)) {
 //                 return get_reg().load_backend(path.c_str(), silent);
 //             }
 //         }
 //         return nullptr;
 //     }
 //     return get_reg().load_backend(best_path.c_str(), silent);
 }
 void ggml_backend_load_all() {
--- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
+++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
@ -3838,9 +3838,7 @@ static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_b
    GGML_UNUSED(data_size);
 }
-namespace ggml {
+namespace ggml::cpu::aarch64 {
 namespace cpu {
 namespace aarch64 { //ggml::cpu::aarch64
 // repack
 template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS>
 int repack(struct ggml_tensor *, const void *, size_t);
@ -4156,8 +4154,6 @@ static const tensor_traits<block_q4_0, 8, 8> q4_0_8x8_q8_0;
 // instance for IQ4
 static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0;
 }
 }
 }  // namespace ggml::cpu::aarch64
 static void flag_aarch_prepacked_quant(int type)
@ -4260,9 +4256,7 @@ static size_t ggml_backend_cpu_aarch64_buffer_type_get_alignment(ggml_backend_bu
    GGML_UNUSED(buft);
 }
-namespace ggml {
+namespace ggml::cpu::aarch64 {
 namespace cpu {
 namespace aarch64 { //ggml::cpu::aarch64
 class extra_buffer_type : ggml::cpu::extra_buffer_type {
    bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
        if (    op->op == GGML_OP_MUL_MAT &&
@ -4309,9 +4303,6 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
        return nullptr;
    }
 };
 }
 }
 }  // namespace ggml::cpu::aarch64
 ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) {
--- a/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
+++ b/ggml/src/ggml-cpu/ggml-cpu-traits.cpp
@ -3,12 +3,10 @@
 #include "ggml-backend-impl.h"
 #include "ggml-backend.h"
-namespace ggml {
+namespace ggml::cpu {
 namespace cpu {
 tensor_traits::~tensor_traits() {}
 extra_buffer_type::~extra_buffer_type() {}
 }
 }  // namespace ggml::cpu
 bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {
--- a/ggml/src/ggml-cpu/ggml-cpu-traits.h
+++ b/ggml/src/ggml-cpu/ggml-cpu-traits.h
@ -15,8 +15,7 @@ bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size
 #ifdef __cplusplus
 }
-namespace ggml {
+namespace ggml::cpu {
 namespace cpu {
 // register in tensor->extra
 class tensor_traits {
  public:
@ -31,7 +30,6 @@ class extra_buffer_type {
    virtual bool            supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
    virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op)                   = 0;
 };
 }
 }  // namespace ggml::cpu
 // implemented in ggml-cpu.cpp.
--- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
@ -263,18 +263,22 @@ std::map<std::string, std::string> merge_maps(const std::map<std::string, std::s
 }
 static std::vector<std::future<void>> compiles;
 // void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
 //     {
 //         // wait until fewer than N compiles are in progress.
 //         // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
 //         uint32_t N = 16;
 //         std::unique_lock<std::mutex> guard(compile_count_mutex);
 //         while (compile_count >= N) {
 //             compile_count_cond.wait(guard);
 //         }
 //         compile_count++;
 //     }
 //     compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
 // }
 void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
-    {
+     std::cout << "string_to_spv: " << _name << "\n";
-        // wait until fewer than N compiles are in progress.
+     string_to_spv_func(_name, in_fname, defines, fp16, coopmat, coopmat2, f16acc); //non async version
        // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
        uint32_t N = 16;
        std::unique_lock<std::mutex> guard(compile_count_mutex);
        while (compile_count >= N) {
            compile_count_cond.wait(guard);
        }
        compile_count++;
    }
    compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
 }
 void matmul_shaders(bool fp16, bool matmul_id, bool coopmat, bool coopmat2, bool f16acc) {