doing what i must because i can, after the mess that is https://github.com/ggml-org/llama.cpp/pull/13892

there is so much duplicate code in each cpu arch, i expect upstream will prune it eventually
arch detection has no fallback if all the arches are not found, by right we should set GGML_CPU_GENERIC
i should be relaxing its the weekend
This commit is contained in:
Concedo 2025-06-14 01:41:16 +08:00
parent f50c793140
commit 33809c9e82
4 changed files with 64 additions and 11 deletions

View file

@ -379,6 +379,8 @@ add_library(ggml
ggml/src/ggml-cpu/traits.h
ggml/src/ggml-threading.cpp
ggml/src/ggml-cpu/ggml-cpu.cpp
ggml/src/ggml-cpu/kcpp-quantmapper.c
ggml/src/ggml-cpu/kcpp-repackmapper.cpp
ggml/src/ggml-cpu/repack.cpp
ggml/src/ggml-cpu/repack.h
ggml/src/ggml-cpu/quants.c

View file

@ -90,10 +90,10 @@ endif
CUBLASLD_FLAGS =
CUBLAS_OBJS =
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-repack.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o sampling.o kcpputils.o
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-repack_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx2.o common.o sampling.o kcpputils.o
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-repack_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx1.o common.o sampling.o kcpputils.o
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-repack_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_failsafe.o common.o sampling.o kcpputils.o
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o kcpp-quantmapper.o ggml-repack.o kcpp-repackmapper.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o sampling.o kcpputils.o
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants.o kcpp-quantmapper_noavx2.o ggml-repack.o kcpp-repackmapper_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx2.o common.o sampling.o kcpputils.o
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants.o kcpp-quantmapper_noavx1.o ggml-repack.o kcpp-repackmapper_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx1.o common.o sampling.o kcpputils.o
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants.o kcpp-quantmapper_failsafe.o ggml-repack.o kcpp-repackmapper_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_failsafe.o common.o sampling.o kcpputils.o
# OS specific
ifeq ($(UNAME_S),Linux)
@ -507,23 +507,29 @@ ggml-quants_noavx1.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-q
$(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
#cpu quants
ggml-cpu-quants.o: ggml/src/ggml-cpu/quants.c ggml/include/ggml.h ggml/src/ggml-cpu/quants.h ggml/src/ggml-common.h
$(CC) $(CFLAGS) -c $< -o $@
kcpp-quantmapper.o: ggml/src/ggml-cpu/kcpp-quantmapper.c
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml-cpu-quants_noavx2.o: ggml/src/ggml-cpu/quants.c ggml/include/ggml.h ggml/src/ggml-cpu/quants.h ggml/src/ggml-common.h
kcpp-quantmapper_noavx2.o: ggml/src/ggml-cpu/kcpp-quantmapper.c
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
ggml-cpu-quants_noavx1.o: ggml/src/ggml-cpu/quants.c ggml/include/ggml.h ggml/src/ggml-cpu/quants.h ggml/src/ggml-common.h
kcpp-quantmapper_noavx1.o: ggml/src/ggml-cpu/kcpp-quantmapper.c
$(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
ggml-cpu-quants_failsafe.o: ggml/src/ggml-cpu/quants.c ggml/include/ggml.h ggml/src/ggml-cpu/quants.h ggml/src/ggml-common.h
kcpp-quantmapper_failsafe.o: ggml/src/ggml-cpu/kcpp-quantmapper.c
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
#aarch64 repack
ggml-cpu-repack.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
ggml-repack.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
$(CXX) $(CXXFLAGS) -c $< -o $@
kcpp-repackmapper.o: ggml/src/ggml-cpu/kcpp-repackmapper.cpp
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
ggml-cpu-repack_noavx2.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
kcpp-repackmapper_noavx2.o: ggml/src/ggml-cpu/kcpp-repackmapper.cpp
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
ggml-cpu-repack_noavx1.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
kcpp-repackmapper_noavx1.o: ggml/src/ggml-cpu/kcpp-repackmapper.cpp
$(CXX) $(CXXFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
ggml-cpu-repack_failsafe.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
kcpp-repackmapper_failsafe.o: ggml/src/ggml-cpu/kcpp-repackmapper.cpp
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
#sgemm

View file

@ -0,0 +1,24 @@
//LCPP had to go and split up our nice all in one cpu quant handling. It's always something eh?
//Now, we need to determine at compile time which subfile to load for kcpp.
//priority goes X86_64 > ARM/AARCH > everything else. may god help us all.
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__amd64__) || defined(__AMD64__)
#pragma message("KoboldCpp Compiling Quants for x86/x64")
#include "arch/x86/quants.c"
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) || defined(__arm64__) || defined(__ARM64__)
#pragma message("KoboldCpp Compiling Quants for ARM")
#include "arch/arm/quants.c"
#elif defined(__powerpc64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__ppc64le__)
#pragma message("KoboldCpp Compiling Quants for PowerPC")
#include "arch/powerpc/quants.c"
#elif defined(__loongarch__) || defined(__loongarch64)
#pragma message("KoboldCpp Compiling Quants for LoongArch")
#include "arch/loongarch/quants.c"
#elif defined(__riscv) && (__riscv_xlen == 64)
#pragma message("KoboldCpp Compiling Quants for RISCV")
#include "arch/riscv/quants.c"
#elif defined(__s390x__)
#pragma message("KoboldCpp Compiling Quants for S390X")
#include "arch/s390/quants.c"
#else
#pragma message("KoboldCpp Cannot Compile Quants! Unknown Architecture!")
#endif

View file

@ -0,0 +1,21 @@
//LCPP had to go and split up our nice all in one cpu quant handling. It's always something eh?
//Now, we need to determine at compile time which subfile to load for kcpp.
//priority goes X86_64 > ARM/AARCH > everything else. may god help us all.
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__amd64__) || defined(__AMD64__)
#pragma message("KoboldCpp Compiling Repack for x86/x64")
#include "arch/x86/repack.cpp"
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) || defined(__arm64__) || defined(__ARM64__)
#pragma message("KoboldCpp Compiling Repack for ARM")
#include "arch/arm/repack.cpp"
#elif defined(__powerpc64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__ppc64le__)
#pragma message("KoboldCpp Compiling Repack for PowerPC")
#elif defined(__loongarch__) || defined(__loongarch64)
#pragma message("KoboldCpp Compiling Repack for LoongArch")
#elif defined(__riscv) && (__riscv_xlen == 64)
#pragma message("KoboldCpp Compiling Repack for RISCV")
#include "arch/riscv/repack.cpp"
#elif defined(__s390x__)
#pragma message("KoboldCpp Compiling Repack for S390X")
#else
#pragma message("KoboldCpp Cannot Compile Repack! Unknown Architecture!")
#endif