Merge branch 'master' into concedo_experimental

This commit is contained in:
Concedo 2023-05-21 00:30:06 +08:00
commit d1824f1e88
3 changed files with 55 additions and 23 deletions

6
ggml.c
View file

@ -512,7 +512,7 @@ static inline int hsum_i32_4(const __m128i a) {
return _mm_cvtsi128_si32(_mm_add_epi32(sum64, hi32));
}
#if __AVX2__ || __AVX512F__
#if defined(__AVX2__) || defined(__AVX512F__)
// spread 32 bits to 32 bytes { 0x00, 0xFF }
static inline __m256i bytes_from_bits_32(const uint8_t * x) {
uint32_t x32;
@ -688,7 +688,7 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128
#endif // __AVX__ || __AVX2__ || __AVX512F__
#endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
#if __ARM_NEON
#if defined(__ARM_NEON)
#if !defined(__aarch64__)
@ -2481,7 +2481,7 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
}
sumf += (GGML_FP16_TO_FP32(x[i]).d*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
}
*s = sumf;