mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
doing what i must because i can, after the mess that is https://github.com/ggml-org/llama.cpp/pull/13892
there is so much duplicate code in each cpu arch, i expect upstream will prune it eventually arch detection has no fallback if all the arches are not found, by right we should set GGML_CPU_GENERIC i should be relaxing its the weekend
This commit is contained in:
parent
f50c793140
commit
33809c9e82
4 changed files with 64 additions and 11 deletions
|
|
@ -379,6 +379,8 @@ add_library(ggml
|
|||
ggml/src/ggml-cpu/traits.h
|
||||
ggml/src/ggml-threading.cpp
|
||||
ggml/src/ggml-cpu/ggml-cpu.cpp
|
||||
ggml/src/ggml-cpu/kcpp-quantmapper.c
|
||||
ggml/src/ggml-cpu/kcpp-repackmapper.cpp
|
||||
ggml/src/ggml-cpu/repack.cpp
|
||||
ggml/src/ggml-cpu/repack.h
|
||||
ggml/src/ggml-cpu/quants.c
|
||||
|
|
|
|||
28
Makefile
28
Makefile
|
|
@ -90,10 +90,10 @@ endif
|
|||
CUBLASLD_FLAGS =
|
||||
CUBLAS_OBJS =
|
||||
|
||||
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-repack.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o sampling.o kcpputils.o
|
||||
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-repack_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx2.o common.o sampling.o kcpputils.o
|
||||
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-repack_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx1.o common.o sampling.o kcpputils.o
|
||||
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-repack_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_failsafe.o common.o sampling.o kcpputils.o
|
||||
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o kcpp-quantmapper.o ggml-repack.o kcpp-repackmapper.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o sampling.o kcpputils.o
|
||||
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants.o kcpp-quantmapper_noavx2.o ggml-repack.o kcpp-repackmapper_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx2.o common.o sampling.o kcpputils.o
|
||||
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants.o kcpp-quantmapper_noavx1.o ggml-repack.o kcpp-repackmapper_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx1.o common.o sampling.o kcpputils.o
|
||||
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants.o kcpp-quantmapper_failsafe.o ggml-repack.o kcpp-repackmapper_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_failsafe.o common.o sampling.o kcpputils.o
|
||||
|
||||
# OS specific
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
|
|
@ -507,23 +507,29 @@ ggml-quants_noavx1.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-q
|
|||
$(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
|
||||
ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||
|
||||
#cpu quants
|
||||
ggml-cpu-quants.o: ggml/src/ggml-cpu/quants.c ggml/include/ggml.h ggml/src/ggml-cpu/quants.h ggml/src/ggml-common.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
kcpp-quantmapper.o: ggml/src/ggml-cpu/kcpp-quantmapper.c
|
||||
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||
ggml-cpu-quants_noavx2.o: ggml/src/ggml-cpu/quants.c ggml/include/ggml.h ggml/src/ggml-cpu/quants.h ggml/src/ggml-common.h
|
||||
kcpp-quantmapper_noavx2.o: ggml/src/ggml-cpu/kcpp-quantmapper.c
|
||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||
ggml-cpu-quants_noavx1.o: ggml/src/ggml-cpu/quants.c ggml/include/ggml.h ggml/src/ggml-cpu/quants.h ggml/src/ggml-common.h
|
||||
kcpp-quantmapper_noavx1.o: ggml/src/ggml-cpu/kcpp-quantmapper.c
|
||||
$(CC) $(CFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
|
||||
ggml-cpu-quants_failsafe.o: ggml/src/ggml-cpu/quants.c ggml/include/ggml.h ggml/src/ggml-cpu/quants.h ggml/src/ggml-common.h
|
||||
kcpp-quantmapper_failsafe.o: ggml/src/ggml-cpu/kcpp-quantmapper.c
|
||||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||
|
||||
#aarch64 repack
|
||||
ggml-cpu-repack.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
|
||||
ggml-repack.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
kcpp-repackmapper.o: ggml/src/ggml-cpu/kcpp-repackmapper.cpp
|
||||
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||
ggml-cpu-repack_noavx2.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
|
||||
kcpp-repackmapper_noavx2.o: ggml/src/ggml-cpu/kcpp-repackmapper.cpp
|
||||
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||
ggml-cpu-repack_noavx1.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
|
||||
kcpp-repackmapper_noavx1.o: ggml/src/ggml-cpu/kcpp-repackmapper.cpp
|
||||
$(CXX) $(CXXFLAGS) $(SIMPLERCFLAGS) -c $< -o $@
|
||||
ggml-cpu-repack_failsafe.o: ggml/src/ggml-cpu/repack.cpp ggml/include/ggml.h ggml/src/ggml-cpu/repack.h
|
||||
kcpp-repackmapper_failsafe.o: ggml/src/ggml-cpu/kcpp-repackmapper.cpp
|
||||
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||
|
||||
#sgemm
|
||||
|
|
|
|||
24
ggml/src/ggml-cpu/kcpp-quantmapper.c
Normal file
24
ggml/src/ggml-cpu/kcpp-quantmapper.c
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
//LCPP had to go and split up our nice all in one cpu quant handling. It's always something eh?
|
||||
//Now, we need to determine at compile time which subfile to load for kcpp.
|
||||
//priority goes X86_64 > ARM/AARCH > everything else. may god help us all.
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__amd64__) || defined(__AMD64__)
|
||||
#pragma message("KoboldCpp Compiling Quants for x86/x64")
|
||||
#include "arch/x86/quants.c"
|
||||
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) || defined(__arm64__) || defined(__ARM64__)
|
||||
#pragma message("KoboldCpp Compiling Quants for ARM")
|
||||
#include "arch/arm/quants.c"
|
||||
#elif defined(__powerpc64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__ppc64le__)
|
||||
#pragma message("KoboldCpp Compiling Quants for PowerPC")
|
||||
#include "arch/powerpc/quants.c"
|
||||
#elif defined(__loongarch__) || defined(__loongarch64)
|
||||
#pragma message("KoboldCpp Compiling Quants for LoongArch")
|
||||
#include "arch/loongarch/quants.c"
|
||||
#elif defined(__riscv) && (__riscv_xlen == 64)
|
||||
#pragma message("KoboldCpp Compiling Quants for RISCV")
|
||||
#include "arch/riscv/quants.c"
|
||||
#elif defined(__s390x__)
|
||||
#pragma message("KoboldCpp Compiling Quants for S390X")
|
||||
#include "arch/s390/quants.c"
|
||||
#else
|
||||
#pragma message("KoboldCpp Cannot Compile Quants! Unknown Architecture!")
|
||||
#endif
|
||||
21
ggml/src/ggml-cpu/kcpp-repackmapper.cpp
Normal file
21
ggml/src/ggml-cpu/kcpp-repackmapper.cpp
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
//LCPP had to go and split up our nice all in one cpu quant handling. It's always something eh?
|
||||
//Now, we need to determine at compile time which subfile to load for kcpp.
|
||||
//priority goes X86_64 > ARM/AARCH > everything else. may god help us all.
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__amd64__) || defined(__AMD64__)
|
||||
#pragma message("KoboldCpp Compiling Repack for x86/x64")
|
||||
#include "arch/x86/repack.cpp"
|
||||
#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) || defined(__arm64__) || defined(__ARM64__)
|
||||
#pragma message("KoboldCpp Compiling Repack for ARM")
|
||||
#include "arch/arm/repack.cpp"
|
||||
#elif defined(__powerpc64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__ppc64le__)
|
||||
#pragma message("KoboldCpp Compiling Repack for PowerPC")
|
||||
#elif defined(__loongarch__) || defined(__loongarch64)
|
||||
#pragma message("KoboldCpp Compiling Repack for LoongArch")
|
||||
#elif defined(__riscv) && (__riscv_xlen == 64)
|
||||
#pragma message("KoboldCpp Compiling Repack for RISCV")
|
||||
#include "arch/riscv/repack.cpp"
|
||||
#elif defined(__s390x__)
|
||||
#pragma message("KoboldCpp Compiling Repack for S390X")
|
||||
#else
|
||||
#pragma message("KoboldCpp Cannot Compile Repack! Unknown Architecture!")
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue