made shaders gen deterministic, update to c++17 (+4 squashed commit)

Squashed commit:

[7bb2441b] made shaders gen deterministic

[906e02af] Update c++ from 11 to 17 (#1263)

* Update c/c++ from 11 to 17

* Update CMakeLists.txt

only bump c++

[7ca430ed] C++17 ver

[b7dfb55d] give up and switch to c++17 (+1 squashed commits)

Squashed commits:

[96cfbc48] give up and switch to c++17 (+5 squashed commit)

Squashed commit:

[19ac7c26] Revert "fixed incorrect number of params"

This reverts commit 51388729bc4ffe51ab07ae02ce386219fb5e2876.

[45f730da] Revert "fix for c++17"

This reverts commit 050ba5f72b3358f958722addb9aaa77ff2e428ee.

[51388729] fixed incorrect number of params

[8f1ee54e] build latest vk shaders

[050ba5f7] fix for c++17
This commit is contained in:
Concedo 2024-12-13 17:55:16 +08:00
parent 46d76d913f
commit a63c2c914d
7 changed files with 91 additions and 105 deletions

View file

@ -59,7 +59,7 @@ option(LLAMA_OPENMP "llama: use OpenMP"
# Compile flags # Compile flags
# #
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED true) set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true) set(CMAKE_C_STANDARD_REQUIRED true)
@ -490,28 +490,28 @@ add_library(common2
src/unicode.cpp src/unicode.cpp
src/unicode-data.cpp) src/unicode-data.cpp)
target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(sdtype_adapter add_library(sdtype_adapter
otherarch/sdcpp/sdtype_adapter.cpp) otherarch/sdcpp/sdtype_adapter.cpp)
target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump target_compile_features(sdtype_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(whisper_adapter add_library(whisper_adapter
otherarch/whispercpp/whisper_adapter.cpp) otherarch/whispercpp/whisper_adapter.cpp)
target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common) target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
target_compile_features(whisper_adapter PUBLIC cxx_std_11) # don't bump target_compile_features(whisper_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(gpttype_adapter add_library(gpttype_adapter
gpttype_adapter.cpp) gpttype_adapter.cpp)
target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump target_compile_features(gpttype_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
@ -519,23 +519,23 @@ if (LLAMA_CUBLAS)
set(TARGET koboldcpp_cublas) set(TARGET koboldcpp_cublas)
add_library(${TARGET} SHARED expose.cpp expose.h) add_library(${TARGET} SHARED expose.cpp expose.h)
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
set_target_properties(${TARGET} PROPERTIES PREFIX "") set_target_properties(${TARGET} PROPERTIES PREFIX "")
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas") set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS}) target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
target_compile_features(${TARGET} PRIVATE cxx_std_11) target_compile_features(${TARGET} PRIVATE cxx_std_17)
endif() endif()
if (LLAMA_HIPBLAS) if (LLAMA_HIPBLAS)
set(TARGET koboldcpp_hipblas) set(TARGET koboldcpp_hipblas)
add_library(${TARGET} SHARED expose.cpp expose.h) add_library(${TARGET} SHARED expose.cpp expose.h)
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
set_target_properties(${TARGET} PROPERTIES PREFIX "") set_target_properties(${TARGET} PROPERTIES PREFIX "")
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas") set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS}) target_link_libraries(${TARGET} PUBLIC Threads::Threads ggml ggml_v1 ggml_v2 ggml_v3 common2 gpttype_adapter whisper_adapter sdtype_adapter ${LLAMA_EXTRA_LIBS})
target_compile_features(${TARGET} PRIVATE cxx_std_11) target_compile_features(${TARGET} PRIVATE cxx_std_17)
endif() endif()

View file

@ -47,7 +47,7 @@ endif
# Compile flags # Compile flags
# #
# keep standard at C11 and C++11 # keep standard at C11 and C++17
CFLAGS = CFLAGS =
CXXFLAGS = CXXFLAGS =
ifdef KCPP_DEBUG ifdef KCPP_DEBUG
@ -55,7 +55,7 @@ ifdef KCPP_DEBUG
CXXFLAGS = -g -O0 CXXFLAGS = -g -O0
endif endif
CFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64 CFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64 CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_AARCH64
ifndef KCPP_DEBUG ifndef KCPP_DEBUG
CFLAGS += -DNDEBUG -s CFLAGS += -DNDEBUG -s
CXXFLAGS += -DNDEBUG -s CXXFLAGS += -DNDEBUG -s

View file

@ -64,7 +64,7 @@
#ifdef _WIN32 #ifdef _WIN32
using dl_handle = typename std::remove_pointer<HMODULE>::type; using dl_handle = std::remove_pointer_t<HMODULE>;
struct dl_handle_deleter { struct dl_handle_deleter {
void operator()(HMODULE handle) { void operator()(HMODULE handle) {
@ -452,75 +452,70 @@ static std::string backend_filename_suffix() {
static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) { static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, const char * user_search_path) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths // enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
// TODO: search system paths // TODO: search system paths
std::string file_prefix = backend_filename_prefix() + name + "-";
std::vector<std::string> search_paths;
if (user_search_path == nullptr) {
search_paths.push_back("./");
search_paths.push_back(get_executable_path());
} else {
#if defined(_WIN32)
search_paths.push_back(std::string(user_search_path) + "\\");
#else
search_paths.push_back(std::string(user_search_path) + "/");
#endif
}
//not available as we don't want c++17 int best_score = 0;
printf("\nggml_backend_load_best NOT AVAILABLE!\n"); std::string best_path;
namespace fs = std::filesystem;
for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
continue;
}
for (const auto & entry : fs::directory_iterator(search_path)) {
if (entry.is_regular_file()) {
std::string filename = entry.path().filename().string();
std::string ext = entry.path().extension().string();
if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
if (!handle && !silent) {
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
}
if (handle) {
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
if (score_fn) {
int s = score_fn();
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
#endif
if (s > best_score) {
best_score = s;
best_path = entry.path().string();
}
} else {
if (!silent) {
GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
}
}
}
}
}
}
}
if (best_score == 0) {
// try to load the base backend
for (const auto & search_path : search_paths) {
std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
if (fs::exists(path)) {
return get_reg().load_backend(path.c_str(), silent);
}
}
return nullptr; return nullptr;
}
// std::string file_prefix = backend_filename_prefix() + name + "-"; return get_reg().load_backend(best_path.c_str(), silent);
// std::vector<std::string> search_paths;
// if (user_search_path == nullptr) {
// search_paths.push_back("./");
// search_paths.push_back(get_executable_path());
// } else {
// #if defined(_WIN32)
// search_paths.push_back(std::string(user_search_path) + "\\");
// #else
// search_paths.push_back(std::string(user_search_path) + "/");
// #endif
// }
// int best_score = 0;
// std::string best_path;
// namespace fs = std::filesystem;
// for (const auto & search_path : search_paths) {
// if (!fs::exists(search_path)) {
// continue;
// }
// for (const auto & entry : fs::directory_iterator(search_path)) {
// if (entry.is_regular_file()) {
// std::string filename = entry.path().filename().string();
// std::string ext = entry.path().extension().string();
// if (filename.find(file_prefix) == 0 && ext == backend_filename_suffix()) {
// dl_handle_ptr handle { dl_load_library(entry.path().c_str()) };
// if (!handle && !silent) {
// GGML_LOG_ERROR("%s: failed to load %s\n", __func__, entry.path().string().c_str());
// }
// if (handle) {
// auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
// if (score_fn) {
// int s = score_fn();
// #ifndef NDEBUG
// GGML_LOG_DEBUG("%s: %s score: %d\n", __func__, entry.path().string().c_str(), s);
// #endif
// if (s > best_score) {
// best_score = s;
// best_path = entry.path().string();
// }
// } else {
// if (!silent) {
// GGML_LOG_INFO("%s: failed to find ggml_backend_score in %s\n", __func__, entry.path().string().c_str());
// }
// }
// }
// }
// }
// }
// }
// if (best_score == 0) {
// // try to load the base backend
// for (const auto & search_path : search_paths) {
// std::string path = search_path + backend_filename_prefix() + name + backend_filename_suffix();
// if (fs::exists(path)) {
// return get_reg().load_backend(path.c_str(), silent);
// }
// }
// return nullptr;
// }
// return get_reg().load_backend(best_path.c_str(), silent);
} }
void ggml_backend_load_all() { void ggml_backend_load_all() {

View file

@ -3838,9 +3838,7 @@ static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_b
GGML_UNUSED(data_size); GGML_UNUSED(data_size);
} }
namespace ggml { namespace ggml::cpu::aarch64 {
namespace cpu {
namespace aarch64 { //ggml::cpu::aarch64
// repack // repack
template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS> template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS>
int repack(struct ggml_tensor *, const void *, size_t); int repack(struct ggml_tensor *, const void *, size_t);
@ -4156,8 +4154,6 @@ static const tensor_traits<block_q4_0, 8, 8> q4_0_8x8_q8_0;
// instance for IQ4 // instance for IQ4
static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0; static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0;
}
}
} // namespace ggml::cpu::aarch64 } // namespace ggml::cpu::aarch64
static void flag_aarch_prepacked_quant(int type) static void flag_aarch_prepacked_quant(int type)
@ -4260,9 +4256,7 @@ static size_t ggml_backend_cpu_aarch64_buffer_type_get_alignment(ggml_backend_bu
GGML_UNUSED(buft); GGML_UNUSED(buft);
} }
namespace ggml { namespace ggml::cpu::aarch64 {
namespace cpu {
namespace aarch64 { //ggml::cpu::aarch64
class extra_buffer_type : ggml::cpu::extra_buffer_type { class extra_buffer_type : ggml::cpu::extra_buffer_type {
bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override { bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
if ( op->op == GGML_OP_MUL_MAT && if ( op->op == GGML_OP_MUL_MAT &&
@ -4309,9 +4303,6 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
return nullptr; return nullptr;
} }
}; };
}
}
} // namespace ggml::cpu::aarch64 } // namespace ggml::cpu::aarch64
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) { ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) {

View file

@ -3,12 +3,10 @@
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#include "ggml-backend.h" #include "ggml-backend.h"
namespace ggml { namespace ggml::cpu {
namespace cpu {
tensor_traits::~tensor_traits() {} tensor_traits::~tensor_traits() {}
extra_buffer_type::~extra_buffer_type() {} extra_buffer_type::~extra_buffer_type() {}
}
} // namespace ggml::cpu } // namespace ggml::cpu
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) { bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {

View file

@ -15,8 +15,7 @@ bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size
#ifdef __cplusplus #ifdef __cplusplus
} }
namespace ggml { namespace ggml::cpu {
namespace cpu {
// register in tensor->extra // register in tensor->extra
class tensor_traits { class tensor_traits {
public: public:
@ -31,7 +30,6 @@ class extra_buffer_type {
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0; virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0; virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op) = 0;
}; };
}
} // namespace ggml::cpu } // namespace ggml::cpu
// implemented in ggml-cpu.cpp. // implemented in ggml-cpu.cpp.

View file

@ -263,18 +263,22 @@ std::map<std::string, std::string> merge_maps(const std::map<std::string, std::s
} }
static std::vector<std::future<void>> compiles; static std::vector<std::future<void>> compiles;
// void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
// {
// // wait until fewer than N compiles are in progress.
// // 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
// uint32_t N = 16;
// std::unique_lock<std::mutex> guard(compile_count_mutex);
// while (compile_count >= N) {
// compile_count_cond.wait(guard);
// }
// compile_count++;
// }
// compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
// }
void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) { void string_to_spv(const std::string& _name, const std::string& in_fname, const std::map<std::string, std::string>& defines, bool fp16 = true, bool coopmat = false, bool coopmat2 = false, bool f16acc = false) {
{ std::cout << "string_to_spv: " << _name << "\n";
// wait until fewer than N compiles are in progress. string_to_spv_func(_name, in_fname, defines, fp16, coopmat, coopmat2, f16acc); //non async version
// 16 is an arbitrary limit, the goal is to avoid "failed to create pipe" errors.
uint32_t N = 16;
std::unique_lock<std::mutex> guard(compile_count_mutex);
while (compile_count >= N) {
compile_count_cond.wait(guard);
}
compile_count++;
}
compiles.push_back(std::async(string_to_spv_func, _name, in_fname, defines, fp16, coopmat, coopmat2, f16acc));
} }
void matmul_shaders(bool fp16, bool matmul_id, bool coopmat, bool coopmat2, bool f16acc) { void matmul_shaders(bool fp16, bool matmul_id, bool coopmat, bool coopmat2, bool f16acc) {