mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-26 10:41:25 +00:00
add support for llguidance (#1728)
* add llguidance remove tab indentation for makefile if statements - these are dangerous fix broken tool compilation commands add USE_LLGUIDANCE env var to enable llguidance for faster structured output generation add llguidance as an optional submodule * rm submodule
This commit is contained in:
parent
f7fa283bb6
commit
27c443f01e
2 changed files with 22 additions and 10 deletions
0
.gitmodules
vendored
Normal file
0
.gitmodules
vendored
Normal file
32
Makefile
32
Makefile
|
|
@ -57,6 +57,7 @@ CXXFLAGS += -fsanitize=undefined -fsanitize-undefined-trap-on-error
|
|||
endif
|
||||
CFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK
|
||||
CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK
|
||||
|
||||
ifndef KCPP_DEBUG
|
||||
CFLAGS += -DNDEBUG -s
|
||||
CXXFLAGS += -DNDEBUG -s
|
||||
|
|
@ -74,6 +75,17 @@ CFLAGS += -pthread -Wno-deprecated -Wno-deprecated-declarations -Wno-unused-va
|
|||
CXXFLAGS += -pthread -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -Wno-unused-variable
|
||||
|
||||
LDFLAGS =
|
||||
|
||||
ifdef USE_LLGUIDANCE
|
||||
CFLAGS += -DLLAMA_USE_LLGUIDANCE
|
||||
CXXFLAGS += -DLLAMA_USE_LLGUIDANCE
|
||||
LDFLAGS += -Lllguidance/target/release -lllguidance
|
||||
OBJS_FULL += llguidance.o
|
||||
llguidance.o: common/llguidance.cpp
|
||||
cd llguidance && cargo build --release
|
||||
$(CXX) $(CXXFLAGS) -Illguidance/target/release -c $< -o $@
|
||||
endif
|
||||
|
||||
FASTCFLAGS = $(subst -O3,-Ofast,$(CFLAGS))
|
||||
FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS))
|
||||
|
||||
|
|
@ -300,10 +312,10 @@ endif # LLAMA_HIPBLAS
|
|||
|
||||
|
||||
ifdef LLAMA_METAL
|
||||
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG -DSD_USE_METAL
|
||||
CXXFLAGS += -DGGML_USE_METAL -DSD_USE_METAL
|
||||
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
OBJS += ggml-metal.o
|
||||
CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG -DSD_USE_METAL
|
||||
CXXFLAGS += -DGGML_USE_METAL -DSD_USE_METAL
|
||||
LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
OBJS += ggml-metal.o
|
||||
|
||||
ggml-metal.o: ggml/src/ggml-metal/ggml-metal.m ggml/src/ggml-metal/ggml-metal-impl.h ggml/include/ggml-metal.h
|
||||
@echo "== Preparing merged Metal file =="
|
||||
|
|
@ -908,17 +920,17 @@ koboldcpp_vulkan_noavx2:
|
|||
endif
|
||||
|
||||
# tools
|
||||
quantize_gguf: tools/quantize/quantize.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL)
|
||||
quantize_gguf: tools/quantize/quantize.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_gptj: otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL)
|
||||
quantize_gptj: otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_gpt2: otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL)
|
||||
quantize_gpt2: otherarch/tools/gpt2_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_neox: otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL)
|
||||
quantize_neox: otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL)
|
||||
quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_clip: tools/mtmd/clip.cpp tools/mtmd/clip.h tools/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL)
|
||||
quantize_clip: tools/mtmd/clip.cpp tools/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
#window simple clinfo
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue