Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.devops/nix/package.nix
#	.github/workflows/build.yml
#	.github/workflows/server.yml
#	CMakeLists.txt
#	Makefile
#	README.md
#	ggml-cuda.cu
#	tests/test-backend-ops.cpp
This commit is contained in:
Concedo 2024-05-19 17:55:20 +08:00
commit d5d5dda02b
34 changed files with 9731 additions and 4163 deletions

View file

@ -14,6 +14,12 @@
#include <stdlib.h> // for qsort
#include <stdio.h> // for GGML_ASSERT
#define GROUP_MAX_EPS 1e-15f
#define GROUP_MAX_EPS_IQ3_XXS 1e-8f
#define GROUP_MAX_EPS_IQ2_S 1e-8f
#define GROUP_MAX_EPS_IQ1_M 1e-7f
#define GROUP_MAX_EPS_IQ1_S 1e-12f
#if defined(_MSC_VER)
// disable "possible loss of data" to avoid warnings for hundreds of casts
// we should just be careful :)
@ -1110,7 +1116,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
float ax = fabsf(x[i]);
if (ax > amax) { amax = ax; max = x[i]; }
}
if (amax < 1e-30f) { // all zero
if (amax < GROUP_MAX_EPS) { // all zero
for (int i = 0; i < n; ++i) {
L[i] = 0;
}
@ -1178,7 +1184,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
float ax = fabsf(x[i]);
if (ax > amax) { amax = ax; max = x[i]; }
}
if (!amax) { // all zero
if (amax < GROUP_MAX_EPS) { // all zero
for (int i = 0; i < n; ++i) { L[i] = 0; }
return 0.f;
}
@ -1647,7 +1653,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t *
break;
}
}
return sumlx / suml2;
return sumlx/suml2;
}
static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restrict y, int k, const float * restrict quant_weights) {
@ -2654,7 +2660,7 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
}
if (!max_abs_scale) {
if (max_abs_scale < GROUP_MAX_EPS) {
memset(&y[i], 0, sizeof(block_q6_K));
y[i].d = GGML_FP32_TO_FP16(0.f);
x += QK_K;
@ -2806,7 +2812,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
}
if (!max_abs_scale) {
if (max_abs_scale < GROUP_MAX_EPS) {
memset(&y[i], 0, sizeof(block_q6_K));
y[i].d = GGML_FP32_TO_FP16(0.f);
x += QK_K;
@ -12600,7 +12606,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
}
float max = xval[0];
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
if (!max) {
if (max < GROUP_MAX_EPS) {
scales[ib] = 0;
memset(L, 0, 32);
continue;
@ -12776,7 +12782,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
}
float max = xval[0];
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
if (!max) {
if (max < GROUP_MAX_EPS) {
scales[ib] = 0;
memset(L, 0, 16);
continue;
@ -13217,7 +13223,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
}
float max = xval[0];
for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
if (!max) {
if (max < GROUP_MAX_EPS_IQ3_XXS) {
scales[ib] = 0;
memset(L, 0, 32);
continue;
@ -13757,7 +13763,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
float max = fabsf(xb[0]);
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
if (!max) {
if (max < GROUP_MAX_EPS_IQ1_S) {
scales[ib] = 0;
memset(L, 1, block_size);
continue;
@ -13945,7 +13951,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
}
float max = fabsf(xb[0]);
for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
if (!max) {
if (max < GROUP_MAX_EPS_IQ1_M) {
scales[ib] = 0;
memset(L, 1, block_size);
continue;
@ -14209,7 +14215,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
amax = ax; max = xb[j];
}
}
if (!amax) {
if (amax < GROUP_MAX_EPS) {
scales[ib] = 0;
continue;
}
@ -14430,7 +14436,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
}
float max = xval[0];
for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
if (!max) {
if (max < GROUP_MAX_EPS_IQ2_S) {
scales[ib] = 0;
continue;
}