Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	README.md
#	examples/llama.android/llama/src/main/cpp/llama-android.cpp
#	examples/run/run.cpp
#	examples/server/README.md
#	examples/server/bench/README.md
#	examples/server/tests/README.md
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-cpu/CMakeLists.txt
#	tests/test-backend-ops.cpp
This commit is contained in:
Concedo 2025-01-03 11:56:20 +08:00
commit 911da8765f
46 changed files with 82848 additions and 73880 deletions

View file

@ -209,9 +209,12 @@ static inline __m256i sum_i16_pairs_int32x8(const __m256i x) {
}
static inline __m256i mul_sum_us8_pairs_int32x8(const __m256i ax, const __m256i sy) {
#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__))
#if defined(__AVX512VNNI__) && defined(__AVX512VL__)
const __m256i zero = _mm256_setzero_si256();
return _mm256_dpbusd_epi32(zero, ax, sy);
#elif defined(__AVXVNNI__)
const __m256i zero = _mm256_setzero_si256();
return _mm256_dpbusd_avx_epi32(zero, ax, sy);
#else
// Perform multiplication and create 16-bit values
const __m256i dot = _mm256_maddubs_epi16(ax, sy);