mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
up to date merge, without vulkan-gen-shaders. They will be built before each release from now on, as they are very large
This commit is contained in:
commit
ed75f8a741
24 changed files with 655 additions and 573 deletions
|
@ -129,8 +129,7 @@ struct ggml_arm_arch_features_type {
|
|||
#endif
|
||||
#include <windows.h>
|
||||
|
||||
|
||||
#if !defined(__clang__)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#define GGML_CACHE_ALIGN __declspec(align(GGML_CACHE_LINE))
|
||||
|
||||
typedef volatile LONG atomic_int;
|
||||
|
@ -458,21 +457,21 @@ const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type
|
|||
#define GGML_F32x4_ADD vaddq_f32
|
||||
#define GGML_F32x4_MUL vmulq_f32
|
||||
#define GGML_F32x4_REDUCE_ONE(x) vaddvq_f32(x)
|
||||
#define GGML_F32x4_REDUCE(res, x) \
|
||||
{ \
|
||||
int offset = GGML_F32_ARR >> 1; \
|
||||
for (int i = 0; i < offset; ++i) { \
|
||||
(x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
|
||||
} \
|
||||
offset >>= 1; \
|
||||
for (int i = 0; i < offset; ++i) { \
|
||||
(x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
|
||||
} \
|
||||
offset >>= 1; \
|
||||
for (int i = 0; i < offset; ++i) { \
|
||||
(x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
|
||||
} \
|
||||
(res) = GGML_F32x4_REDUCE_ONE((x)[0]); \
|
||||
#define GGML_F32x4_REDUCE(res, x) \
|
||||
{ \
|
||||
int offset = GGML_F32_ARR >> 1; \
|
||||
for (int i = 0; i < offset; ++i) { \
|
||||
(x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
|
||||
} \
|
||||
offset >>= 1; \
|
||||
for (int i = 0; i < offset; ++i) { \
|
||||
(x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
|
||||
} \
|
||||
offset >>= 1; \
|
||||
for (int i = 0; i < offset; ++i) { \
|
||||
(x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
|
||||
} \
|
||||
(res) = (ggml_float) GGML_F32x4_REDUCE_ONE((x)[0]); \
|
||||
}
|
||||
|
||||
#define GGML_F32_VEC GGML_F32x4
|
||||
|
@ -2400,7 +2399,7 @@ static void ggml_init_arm_arch_features(void) {
|
|||
uint32_t hwcap2 = getauxval(AT_HWCAP2);
|
||||
|
||||
ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD);
|
||||
ggml_arm_arch_features.has_dotprod = !!(hwcap && HWCAP_ASIMDDP);
|
||||
ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP);
|
||||
ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
|
||||
ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE);
|
||||
|
||||
|
@ -12982,7 +12981,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data);
|
|||
#include "windows.h"
|
||||
|
||||
// TODO: support > 64 CPUs
|
||||
bool ggml_thread_apply_affinity(bool * mask) {
|
||||
static bool ggml_thread_apply_affinity(bool * mask) {
|
||||
HANDLE h = GetCurrentThread();
|
||||
uint64_t bitmask = 0ULL;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue