Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.flake8
#	.github/labeler.yml
#	.github/workflows/bench.yml.disabled
#	.github/workflows/build-linux-cross.yml
#	.github/workflows/build.yml
#	.github/workflows/server.yml
#	.gitignore
#	CMakeLists.txt
#	CODEOWNERS
#	Makefile
#	README.md
#	SECURITY.md
#	build-xcframework.sh
#	ci/run.sh
#	docs/development/HOWTO-add-model.md
#	docs/multimodal/MobileVLM.md
#	docs/multimodal/glmedge.md
#	docs/multimodal/llava.md
#	docs/multimodal/minicpmo2.6.md
#	docs/multimodal/minicpmv2.5.md
#	docs/multimodal/minicpmv2.6.md
#	examples/CMakeLists.txt
#	examples/pydantic_models_to_grammar_examples.py
#	grammars/README.md
#	pyrightconfig.json
#	requirements/requirements-all.txt
#	scripts/fetch_server_test_models.py
#	scripts/tool_bench.py
#	scripts/xxd.cmake
#	tests/CMakeLists.txt
#	tests/run-json-schema-to-grammar.mjs
#	tools/batched-bench/CMakeLists.txt
#	tools/batched-bench/README.md
#	tools/batched-bench/batched-bench.cpp
#	tools/cvector-generator/CMakeLists.txt
#	tools/cvector-generator/README.md
#	tools/cvector-generator/completions.txt
#	tools/cvector-generator/cvector-generator.cpp
#	tools/cvector-generator/mean.hpp
#	tools/cvector-generator/negative.txt
#	tools/cvector-generator/pca.hpp
#	tools/cvector-generator/positive.txt
#	tools/export-lora/CMakeLists.txt
#	tools/export-lora/README.md
#	tools/export-lora/export-lora.cpp
#	tools/gguf-split/CMakeLists.txt
#	tools/gguf-split/README.md
#	tools/imatrix/CMakeLists.txt
#	tools/imatrix/README.md
#	tools/imatrix/imatrix.cpp
#	tools/llama-bench/CMakeLists.txt
#	tools/llama-bench/README.md
#	tools/llama-bench/llama-bench.cpp
#	tools/llava/CMakeLists.txt
#	tools/llava/README.md
#	tools/llava/android/adb_run.sh
#	tools/llava/android/build_64.sh
#	tools/llava/clip-quantize-cli.cpp
#	tools/main/CMakeLists.txt
#	tools/main/README.md
#	tools/perplexity/CMakeLists.txt
#	tools/perplexity/README.md
#	tools/perplexity/perplexity.cpp
#	tools/quantize/CMakeLists.txt
#	tools/rpc/CMakeLists.txt
#	tools/rpc/README.md
#	tools/rpc/rpc-server.cpp
#	tools/run/CMakeLists.txt
#	tools/run/README.md
#	tools/run/linenoise.cpp/linenoise.cpp
#	tools/run/linenoise.cpp/linenoise.h
#	tools/run/run.cpp
#	tools/server/CMakeLists.txt
#	tools/server/README.md
#	tools/server/bench/README.md
#	tools/server/public_simplechat/readme.md
#	tools/server/tests/README.md
#	tools/server/themes/README.md
#	tools/server/themes/buttons-top/README.md
#	tools/server/themes/wild/README.md
#	tools/tokenize/CMakeLists.txt
#	tools/tokenize/tokenize.cpp
This commit is contained in:
Concedo 2025-05-03 12:15:36 +08:00
commit 5a2808ffaf
137 changed files with 75 additions and 303 deletions

View file

@ -21,15 +21,15 @@ indent_style = tab
[prompts/*.txt] [prompts/*.txt]
insert_final_newline = unset insert_final_newline = unset
[examples/server/public/*] [tools/server/public/*]
indent_size = 2 indent_size = 2
[examples/server/public/deps_*] [tools/server/public/deps_*]
trim_trailing_whitespace = unset trim_trailing_whitespace = unset
indent_style = unset indent_style = unset
indent_size = unset indent_size = unset
[examples/server/deps_*] [tools/server/deps_*]
trim_trailing_whitespace = unset trim_trailing_whitespace = unset
indent_style = unset indent_style = unset
indent_size = unset indent_size = unset
@ -37,7 +37,7 @@ indent_size = unset
[examples/llama.swiftui/llama.swiftui.xcodeproj/*] [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab indent_style = tab
[examples/cvector-generator/*.txt] [tools/cvector-generator/*.txt]
trim_trailing_whitespace = unset trim_trailing_whitespace = unset
insert_final_newline = unset insert_final_newline = unset

2
.gitignore vendored
View file

@ -92,8 +92,6 @@ ppl-*.txt
qnt-*.txt qnt-*.txt
perf-*.txt perf-*.txt
examples/jeopardy/results.txt
poetry.lock poetry.lock
poetry.toml poetry.toml

View file

@ -470,51 +470,51 @@ add_library(common2
common/common.h common/common.h
common/sampling.cpp common/sampling.cpp
common/sampling.h common/sampling.h
examples/llava/llava.cpp tools/llava/llava.cpp
examples/llava/llava.h tools/llava/llava.h
examples/llava/clip.cpp tools/llava/clip.cpp
examples/llava/clip.h tools/llava/clip.h
src/unicode.h src/unicode.h
src/unicode.cpp src/unicode.cpp
src/unicode-data.cpp src/unicode-data.cpp
otherarch/utils.cpp otherarch/utils.cpp
otherarch/utils.h) otherarch/utils.h)
target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
target_compile_features(common2 PUBLIC cxx_std_17) # don't bump target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(sdtype_adapter add_library(sdtype_adapter
otherarch/sdcpp/sdtype_adapter.cpp) otherarch/sdcpp/sdtype_adapter.cpp)
target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
target_compile_features(sdtype_adapter PUBLIC cxx_std_17) # don't bump target_compile_features(sdtype_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(whisper_adapter add_library(whisper_adapter
otherarch/whispercpp/whisper_adapter.cpp) otherarch/whispercpp/whisper_adapter.cpp)
target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common) target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./tools ./common)
target_compile_features(whisper_adapter PUBLIC cxx_std_17) # don't bump target_compile_features(whisper_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(tts_adapter add_library(tts_adapter
otherarch/tts_adapter.cpp) otherarch/tts_adapter.cpp)
target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./examples ./common) target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./tools ./common)
target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(embeddings_adapter add_library(embeddings_adapter
otherarch/embeddings_adapter.cpp) otherarch/embeddings_adapter.cpp)
target_include_directories(embeddings_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./examples ./common) target_include_directories(embeddings_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./tools ./common)
target_compile_features(embeddings_adapter PUBLIC cxx_std_17) # don't bump target_compile_features(embeddings_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(embeddings_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS}) target_link_libraries(embeddings_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
set_target_properties(embeddings_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(embeddings_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(gpttype_adapter add_library(gpttype_adapter
gpttype_adapter.cpp) gpttype_adapter.cpp)
target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
target_compile_features(gpttype_adapter PUBLIC cxx_std_17) # don't bump target_compile_features(gpttype_adapter PUBLIC cxx_std_17) # don't bump
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ggml_v1 ggml_v2 ggml_v3 ${LLAMA_EXTRA_LIBS}) target_link_libraries(gpttype_adapter PRIVATE common2 ggml ggml_v1 ggml_v2 ggml_v3 ${LLAMA_EXTRA_LIBS})
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
@ -522,7 +522,7 @@ set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
if (LLAMA_CUBLAS) if (LLAMA_CUBLAS)
set(TARGET koboldcpp_cublas) set(TARGET koboldcpp_cublas)
add_library(${TARGET} SHARED expose.cpp expose.h) add_library(${TARGET} SHARED expose.cpp expose.h)
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
set_target_properties(${TARGET} PROPERTIES PREFIX "") set_target_properties(${TARGET} PROPERTIES PREFIX "")
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas") set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
@ -542,7 +542,7 @@ endif()
if (LLAMA_HIPBLAS) if (LLAMA_HIPBLAS)
set(TARGET koboldcpp_hipblas) set(TARGET koboldcpp_hipblas)
add_library(${TARGET} SHARED expose.cpp expose.h) add_library(${TARGET} SHARED expose.cpp expose.h)
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common) target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
set_target_properties(${TARGET} PROPERTIES PREFIX "") set_target_properties(${TARGET} PROPERTIES PREFIX "")
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas") set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")

View file

@ -511,7 +511,7 @@ sgemm_failsafe.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamaf
#there's no intrinsics or special gpu ops used here, so we can have a universal object #there's no intrinsics or special gpu ops used here, so we can have a universal object
ggml-alloc.o: ggml/src/ggml-alloc.c ggml/include/ggml.h ggml/include/ggml-alloc.h ggml-alloc.o: ggml/src/ggml-alloc.c ggml/include/ggml.h ggml/include/ggml-alloc.h
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -c $< -o $@
llava.o: examples/llava/llava.cpp examples/llava/llava.h llava.o: tools/llava/llava.cpp tools/llava/llava.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
unicode.o: src/unicode.cpp src/unicode.h unicode.o: src/unicode.cpp src/unicode.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
@ -541,11 +541,11 @@ ggml-backend-reg_vulkan.o: ggml/src/ggml-backend-reg.cpp ggml/src/ggml-backend-i
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
ggml-backend-reg_cublas.o: ggml/src/ggml-backend-reg.cpp ggml/src/ggml-backend-impl.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/include/ggml-cpu.h ggml-backend-reg_cublas.o: ggml/src/ggml-backend-reg.cpp ggml/src/ggml-backend-impl.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/include/ggml-cpu.h
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
llavaclip_default.o: examples/llava/clip.cpp examples/llava/clip.h llavaclip_default.o: tools/llava/clip.cpp tools/llava/clip.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h llavaclip_cublas.o: tools/llava/clip.cpp tools/llava/clip.h
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
llavaclip_vulkan.o: examples/llava/clip.cpp examples/llava/clip.h llavaclip_vulkan.o: tools/llava/clip.cpp tools/llava/clip.h
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
#this is only used for accelerate #this is only used for accelerate
@ -663,17 +663,17 @@ clean:
rm -vrf ggml/src/ggml-cuda/template-instances/*.o rm -vrf ggml/src/ggml-cuda/template-instances/*.o
# useful tools # useful tools
main: examples/main/main.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) main: tools/main/main.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
ttsmain: examples/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) ttsmain: tools/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) gguf-split: tools/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
mtmd-cli: examples/llava/mtmd-cli.cpp examples/llava/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) mtmd-cli: tools/llava/mtmd-cli.cpp tools/llava/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
ggml/src/ggml-vulkan-shaders.cpp: ggml/src/ggml-vulkan-shaders.cpp:
@ -817,7 +817,7 @@ koboldcpp_vulkan_noavx2:
endif endif
# tools # tools
quantize_gguf: examples/quantize/quantize.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) quantize_gguf: tools/quantize/quantize.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_gptj: otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) quantize_gptj: otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
@ -827,7 +827,7 @@ quantize_neox: otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
quantize_clip: examples/llava/clip.cpp examples/llava/clip.h examples/llava/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) quantize_clip: tools/llava/clip.cpp tools/llava/clip.h tools/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
#window simple clinfo #window simple clinfo

View file

@ -2212,14 +2212,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING")); ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
add_opt(common_arg( add_opt(common_arg(
{"--mmproj"}, "FILE", {"--mmproj"}, "FILE",
"path to a multimodal projector file. see examples/llava/README.md", "path to a multimodal projector file. see tools/llava/README.md",
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.mmproj.path = value; params.mmproj.path = value;
} }
).set_examples(mmproj_examples)); ).set_examples(mmproj_examples));
add_opt(common_arg( add_opt(common_arg(
{"--mmproj-url"}, "URL", {"--mmproj-url"}, "URL",
"URL to a multimodal projector file. see examples/llava/README.md", "URL to a multimodal projector file. see tools/llava/README.md",
[](common_params & params, const std::string & value) { [](common_params & params, const std::string & value) {
params.mmproj.url = value; params.mmproj.url = value;
} }

View file

@ -336,7 +336,7 @@ struct common_params {
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO; common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
// multimodal models (see examples/llava) // multimodal models (see tools/llava)
struct common_params_model mmproj; struct common_params_model mmproj;
bool mmproj_use_gpu = true; // use GPU for multimodal model bool mmproj_use_gpu = true; // use GPU for multimodal model
bool no_mmproj = false; // explicitly disable multimodal model bool no_mmproj = false; // explicitly disable multimodal model
@ -410,8 +410,8 @@ struct common_params {
int n_pca_batch = 100; int n_pca_batch = 100;
int n_pca_iterations = 1000; int n_pca_iterations = 1000;
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA; dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
std::string cvector_positive_file = "examples/cvector-generator/positive.txt"; std::string cvector_positive_file = "tools/cvector-generator/positive.txt";
std::string cvector_negative_file = "examples/cvector-generator/negative.txt"; std::string cvector_negative_file = "tools/cvector-generator/negative.txt";
bool spm_infill = false; // suffix/prefix/middle pattern for infill bool spm_infill = false; // suffix/prefix/middle pattern for infill

View file

@ -1,265 +0,0 @@
#include "ggml.h"
#include "gguf.h"
#include <cstdio>
#include <string>
#include <sstream>
#include <vector>
#undef MIN
#undef MAX
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
template <typename T>
static std::string to_string(const T & val) {
std::stringstream ss;
ss << val;
return ss.str();
}
static bool gguf_ex_write(const std::string & fname) {
struct gguf_context * ctx = gguf_init_empty();
gguf_set_val_u8 (ctx, "some.parameter.uint8", 0x12);
gguf_set_val_i8 (ctx, "some.parameter.int8", -0x13);
gguf_set_val_u16 (ctx, "some.parameter.uint16", 0x1234);
gguf_set_val_i16 (ctx, "some.parameter.int16", -0x1235);
gguf_set_val_u32 (ctx, "some.parameter.uint32", 0x12345678);
gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
gguf_set_val_u64 (ctx, "some.parameter.uint64", 0x123456789abcdef0ull);
gguf_set_val_i64 (ctx, "some.parameter.int64", -0x123456789abcdef1ll);
gguf_set_val_f64 (ctx, "some.parameter.float64", 0.1234567890123456789);
gguf_set_val_bool(ctx, "some.parameter.bool", true);
gguf_set_val_str (ctx, "some.parameter.string", "hello world");
gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
struct ggml_init_params params = {
/*.mem_size =*/ 128ull*1024ull*1024ull,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
};
struct ggml_context * ctx_data = ggml_init(params);
const int n_tensors = 10;
// tensor infos
for (int i = 0; i < n_tensors; ++i) {
const std::string name = "tensor_" + to_string(i);
int64_t ne[GGML_MAX_DIMS] = { 1 };
int32_t n_dims = rand() % GGML_MAX_DIMS + 1;
for (int j = 0; j < n_dims; ++j) {
ne[j] = rand() % 10 + 1;
}
struct ggml_tensor * cur = ggml_new_tensor(ctx_data, GGML_TYPE_F32, n_dims, ne);
ggml_set_name(cur, name.c_str());
{
float * data = (float *) cur->data;
for (int j = 0; j < ggml_nelements(cur); ++j) {
data[j] = 100 + i;
}
}
gguf_add_tensor(ctx, cur);
}
gguf_write_to_file(ctx, fname.c_str(), false);
printf("%s: wrote file '%s;\n", __func__, fname.c_str());
ggml_free(ctx_data);
gguf_free(ctx);
return true;
}
// just read tensor info
static bool gguf_ex_read_0(const std::string & fname) {
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ NULL,
};
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
if (!ctx) {
fprintf(stderr, "%s: failed to load '%s'\n", __func__, fname.c_str());
return false;
}
printf("%s: version: %d\n", __func__, gguf_get_version(ctx));
printf("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
// kv
{
const int n_kv = gguf_get_n_kv(ctx);
printf("%s: n_kv: %d\n", __func__, n_kv);
for (int i = 0; i < n_kv; ++i) {
const char * key = gguf_get_key(ctx, i);
printf("%s: kv[%d]: key = %s\n", __func__, i, key);
}
}
// find kv string
{
const char * findkey = "some.parameter.string";
const int keyidx = gguf_find_key(ctx, findkey);
if (keyidx == -1) {
printf("%s: find key: %s not found.\n", __func__, findkey);
} else {
const char * key_value = gguf_get_val_str(ctx, keyidx);
printf("%s: find key: %s found, kv[%d] value = %s\n", __func__, findkey, keyidx, key_value);
}
}
// tensor info
{
const int n_tensors = gguf_get_n_tensors(ctx);
printf("%s: n_tensors: %d\n", __func__, n_tensors);
for (int i = 0; i < n_tensors; ++i) {
const char * name = gguf_get_tensor_name (ctx, i);
const size_t size = gguf_get_tensor_size (ctx, i);
const size_t offset = gguf_get_tensor_offset(ctx, i);
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
}
}
gguf_free(ctx);
return true;
}
// read and create ggml_context containing the tensors and their data
static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
struct ggml_context * ctx_data = NULL;
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
};
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
printf("%s: version: %d\n", __func__, gguf_get_version(ctx));
printf("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
// kv
{
const int n_kv = gguf_get_n_kv(ctx);
printf("%s: n_kv: %d\n", __func__, n_kv);
for (int i = 0; i < n_kv; ++i) {
const char * key = gguf_get_key(ctx, i);
printf("%s: kv[%d]: key = %s\n", __func__, i, key);
}
}
// tensor info
{
const int n_tensors = gguf_get_n_tensors(ctx);
printf("%s: n_tensors: %d\n", __func__, n_tensors);
for (int i = 0; i < n_tensors; ++i) {
const char * name = gguf_get_tensor_name (ctx, i);
const size_t size = gguf_get_tensor_size (ctx, i);
const size_t offset = gguf_get_tensor_offset(ctx, i);
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
}
}
// data
{
const int n_tensors = gguf_get_n_tensors(ctx);
for (int i = 0; i < n_tensors; ++i) {
printf("%s: reading tensor %d data\n", __func__, i);
const char * name = gguf_get_tensor_name(ctx, i);
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
// print first 10 elements
const float * data = (const float *) cur->data;
printf("%s data[:10] : ", name);
for (int j = 0; j < MIN(10, ggml_nelements(cur)); ++j) {
printf("%f ", data[j]);
}
printf("\n\n");
// check data
if (check_data) {
const float * data = (const float *) cur->data;
for (int j = 0; j < ggml_nelements(cur); ++j) {
if (data[j] != 100 + i) {
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
gguf_free(ctx);
return false;
}
}
}
}
}
printf("%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data));
ggml_free(ctx_data);
gguf_free(ctx);
return true;
}
int main(int argc, char ** argv) {
if (argc < 3) {
printf("usage: %s data.gguf r|w [n]\n", argv[0]);
printf("r: read data.gguf file\n");
printf("w: write data.gguf file\n");
printf("n: no check of tensor data\n");
return -1;
}
bool check_data = true;
if (argc == 4) {
check_data = false;
}
srand(123456);
const std::string fname(argv[1]);
const std::string mode (argv[2]);
GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
if (mode == "w") {
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
} else if (mode == "r") {
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
GGML_ASSERT(gguf_ex_read_1(fname, check_data) && "failed to read gguf file");
}
return 0;
}

Binary file not shown.

View file

@ -40,8 +40,8 @@
#include "neox_v2.cpp" #include "neox_v2.cpp"
#include "neox_v3.cpp" #include "neox_v3.cpp"
#include "mpt_v3.cpp" #include "mpt_v3.cpp"
#include "examples/llava/clip.h" #include "tools/llava/clip.h"
#include "examples/llava/llava.h" #include "tools/llava/llava.h"
#include "common/common.h" #include "common/common.h"
//const //const

39
tools/CMakeLists.txt Normal file
View file

@ -0,0 +1,39 @@
# dependencies
find_package(Threads REQUIRED)
# third-party
# ...
# flags
llama_add_compile_flags()
# tools
if (EMSCRIPTEN)
else()
add_subdirectory(batched-bench)
add_subdirectory(gguf-split)
add_subdirectory(imatrix)
add_subdirectory(llama-bench)
add_subdirectory(main)
add_subdirectory(perplexity)
add_subdirectory(quantize)
if (LLAMA_BUILD_SERVER)
add_subdirectory(server)
endif()
add_subdirectory(run)
add_subdirectory(tokenize)
add_subdirectory(tts)
if (NOT GGML_BACKEND_DL)
# these examples use the backends directly and cannot be built with dynamic loading
add_subdirectory(cvector-generator)
add_subdirectory(export-lora)
add_subdirectory(llava)
if (GGML_RPC)
add_subdirectory(rpc)
endif()
endif()
endif()

View file

Before

Width:  |  Height:  |  Size: 121 KiB

After

Width:  |  Height:  |  Size: 121 KiB

Before After
Before After

Binary file not shown.

View file

Before

Width:  |  Height:  |  Size: 4 KiB

After

Width:  |  Height:  |  Size: 4 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 21 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 117 KiB

After

Width:  |  Height:  |  Size: 117 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 4 KiB

After

Width:  |  Height:  |  Size: 4 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 4 KiB

After

Width:  |  Height:  |  Size: 4 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 75 KiB

After

Width:  |  Height:  |  Size: 75 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 254 KiB

After

Width:  |  Height:  |  Size: 254 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 485 KiB

After

Width:  |  Height:  |  Size: 485 KiB

Before After
Before After

Some files were not shown because too many files have changed in this diff Show more