mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-16 11:59:42 +00:00
try to make SD.CPP work by reverting to precomputed silu table for ggml_vec_silu_f32
This commit is contained in:
parent
dd59303ae1
commit
b5401a2901
1 changed files with 13 additions and 0 deletions
13
ggml.c
13
ggml.c
|
@ -315,6 +315,9 @@ static ggml_fp16_t ggml_table_gelu_f16[1 << 16];
|
|||
// precomputed quick gelu table for f16 (128 KB)
|
||||
static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
|
||||
|
||||
// precomputed silu table for f16 (128 KB)
|
||||
static ggml_fp16_t ggml_table_silu_f16[1 << 16];
|
||||
|
||||
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
||||
float ggml_table_f32_f16[1 << 16];
|
||||
|
||||
|
@ -2457,6 +2460,15 @@ inline static __m128 ggml_v_silu(__m128 x) {
|
|||
|
||||
static void ggml_vec_silu_f32(const int n, float * y, const float * x) {
|
||||
int i = 0;
|
||||
#if TRUE //todo: this reverts a working SILU FOR STABLE DIFFUSION CPP CPU
|
||||
uint16_t t;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
||||
memcpy(&t, &fp16, sizeof(uint16_t));
|
||||
y[i] = GGML_FP16_TO_FP32(ggml_table_silu_f16[t]);
|
||||
}
|
||||
return;
|
||||
#endif
|
||||
#if defined(__AVX512F__) && defined(__AVX512DQ__)
|
||||
for (; i + 15 < n; i += 16) {
|
||||
_mm512_storeu_ps(y + i, ggml_v_silu(_mm512_loadu_ps(x + i)));
|
||||
|
@ -3332,6 +3344,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||
float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
|
||||
ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
|
||||
ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
||||
ggml_table_silu_f16[i] = GGML_FP32_TO_FP16(ggml_silu_f32(f));
|
||||
}
|
||||
|
||||
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue