Merge branch 'upstream' into concedo_experimental
# Conflicts: # .flake8 # .github/labeler.yml # .github/workflows/bench.yml.disabled # .github/workflows/build-linux-cross.yml # .github/workflows/build.yml # .github/workflows/server.yml # .gitignore # CMakeLists.txt # CODEOWNERS # Makefile # README.md # SECURITY.md # build-xcframework.sh # ci/run.sh # docs/development/HOWTO-add-model.md # docs/multimodal/MobileVLM.md # docs/multimodal/glmedge.md # docs/multimodal/llava.md # docs/multimodal/minicpmo2.6.md # docs/multimodal/minicpmv2.5.md # docs/multimodal/minicpmv2.6.md # examples/CMakeLists.txt # examples/pydantic_models_to_grammar_examples.py # grammars/README.md # pyrightconfig.json # requirements/requirements-all.txt # scripts/fetch_server_test_models.py # scripts/tool_bench.py # scripts/xxd.cmake # tests/CMakeLists.txt # tests/run-json-schema-to-grammar.mjs # tools/batched-bench/CMakeLists.txt # tools/batched-bench/README.md # tools/batched-bench/batched-bench.cpp # tools/cvector-generator/CMakeLists.txt # tools/cvector-generator/README.md # tools/cvector-generator/completions.txt # tools/cvector-generator/cvector-generator.cpp # tools/cvector-generator/mean.hpp # tools/cvector-generator/negative.txt # tools/cvector-generator/pca.hpp # tools/cvector-generator/positive.txt # tools/export-lora/CMakeLists.txt # tools/export-lora/README.md # tools/export-lora/export-lora.cpp # tools/gguf-split/CMakeLists.txt # tools/gguf-split/README.md # tools/imatrix/CMakeLists.txt # tools/imatrix/README.md # tools/imatrix/imatrix.cpp # tools/llama-bench/CMakeLists.txt # tools/llama-bench/README.md # tools/llama-bench/llama-bench.cpp # tools/llava/CMakeLists.txt # tools/llava/README.md # tools/llava/android/adb_run.sh # tools/llava/android/build_64.sh # tools/llava/clip-quantize-cli.cpp # tools/main/CMakeLists.txt # tools/main/README.md # tools/perplexity/CMakeLists.txt # tools/perplexity/README.md # tools/perplexity/perplexity.cpp # tools/quantize/CMakeLists.txt # tools/rpc/CMakeLists.txt # tools/rpc/README.md # tools/rpc/rpc-server.cpp # tools/run/CMakeLists.txt # tools/run/README.md # tools/run/linenoise.cpp/linenoise.cpp # tools/run/linenoise.cpp/linenoise.h # tools/run/run.cpp # tools/server/CMakeLists.txt # tools/server/README.md # tools/server/bench/README.md # tools/server/public_simplechat/readme.md # tools/server/tests/README.md # tools/server/themes/README.md # tools/server/themes/buttons-top/README.md # tools/server/themes/wild/README.md # tools/tokenize/CMakeLists.txt # tools/tokenize/tokenize.cpp
|
@ -21,15 +21,15 @@ indent_style = tab
|
||||||
[prompts/*.txt]
|
[prompts/*.txt]
|
||||||
insert_final_newline = unset
|
insert_final_newline = unset
|
||||||
|
|
||||||
[examples/server/public/*]
|
[tools/server/public/*]
|
||||||
indent_size = 2
|
indent_size = 2
|
||||||
|
|
||||||
[examples/server/public/deps_*]
|
[tools/server/public/deps_*]
|
||||||
trim_trailing_whitespace = unset
|
trim_trailing_whitespace = unset
|
||||||
indent_style = unset
|
indent_style = unset
|
||||||
indent_size = unset
|
indent_size = unset
|
||||||
|
|
||||||
[examples/server/deps_*]
|
[tools/server/deps_*]
|
||||||
trim_trailing_whitespace = unset
|
trim_trailing_whitespace = unset
|
||||||
indent_style = unset
|
indent_style = unset
|
||||||
indent_size = unset
|
indent_size = unset
|
||||||
|
@ -37,7 +37,7 @@ indent_size = unset
|
||||||
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
||||||
indent_style = tab
|
indent_style = tab
|
||||||
|
|
||||||
[examples/cvector-generator/*.txt]
|
[tools/cvector-generator/*.txt]
|
||||||
trim_trailing_whitespace = unset
|
trim_trailing_whitespace = unset
|
||||||
insert_final_newline = unset
|
insert_final_newline = unset
|
||||||
|
|
||||||
|
|
2
.gitignore
vendored
|
@ -92,8 +92,6 @@ ppl-*.txt
|
||||||
qnt-*.txt
|
qnt-*.txt
|
||||||
perf-*.txt
|
perf-*.txt
|
||||||
|
|
||||||
examples/jeopardy/results.txt
|
|
||||||
|
|
||||||
poetry.lock
|
poetry.lock
|
||||||
poetry.toml
|
poetry.toml
|
||||||
|
|
||||||
|
|
|
@ -470,51 +470,51 @@ add_library(common2
|
||||||
common/common.h
|
common/common.h
|
||||||
common/sampling.cpp
|
common/sampling.cpp
|
||||||
common/sampling.h
|
common/sampling.h
|
||||||
examples/llava/llava.cpp
|
tools/llava/llava.cpp
|
||||||
examples/llava/llava.h
|
tools/llava/llava.h
|
||||||
examples/llava/clip.cpp
|
tools/llava/clip.cpp
|
||||||
examples/llava/clip.h
|
tools/llava/clip.h
|
||||||
src/unicode.h
|
src/unicode.h
|
||||||
src/unicode.cpp
|
src/unicode.cpp
|
||||||
src/unicode-data.cpp
|
src/unicode-data.cpp
|
||||||
otherarch/utils.cpp
|
otherarch/utils.cpp
|
||||||
otherarch/utils.h)
|
otherarch/utils.h)
|
||||||
target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
|
||||||
target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
|
target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
|
||||||
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(sdtype_adapter
|
add_library(sdtype_adapter
|
||||||
otherarch/sdcpp/sdtype_adapter.cpp)
|
otherarch/sdcpp/sdtype_adapter.cpp)
|
||||||
target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(sdtype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
|
||||||
target_compile_features(sdtype_adapter PUBLIC cxx_std_17) # don't bump
|
target_compile_features(sdtype_adapter PUBLIC cxx_std_17) # don't bump
|
||||||
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(whisper_adapter
|
add_library(whisper_adapter
|
||||||
otherarch/whispercpp/whisper_adapter.cpp)
|
otherarch/whispercpp/whisper_adapter.cpp)
|
||||||
target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
|
target_include_directories(whisper_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./tools ./common)
|
||||||
target_compile_features(whisper_adapter PUBLIC cxx_std_17) # don't bump
|
target_compile_features(whisper_adapter PUBLIC cxx_std_17) # don't bump
|
||||||
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(tts_adapter
|
add_library(tts_adapter
|
||||||
otherarch/tts_adapter.cpp)
|
otherarch/tts_adapter.cpp)
|
||||||
target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./examples ./common)
|
target_include_directories(tts_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./tools ./common)
|
||||||
target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump
|
target_compile_features(tts_adapter PUBLIC cxx_std_17) # don't bump
|
||||||
target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(tts_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(tts_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(embeddings_adapter
|
add_library(embeddings_adapter
|
||||||
otherarch/embeddings_adapter.cpp)
|
otherarch/embeddings_adapter.cpp)
|
||||||
target_include_directories(embeddings_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./examples ./common)
|
target_include_directories(embeddings_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./tools ./common)
|
||||||
target_compile_features(embeddings_adapter PUBLIC cxx_std_17) # don't bump
|
target_compile_features(embeddings_adapter PUBLIC cxx_std_17) # don't bump
|
||||||
target_link_libraries(embeddings_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(embeddings_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(embeddings_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(embeddings_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(gpttype_adapter
|
add_library(gpttype_adapter
|
||||||
gpttype_adapter.cpp)
|
gpttype_adapter.cpp)
|
||||||
target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(gpttype_adapter PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
|
||||||
target_compile_features(gpttype_adapter PUBLIC cxx_std_17) # don't bump
|
target_compile_features(gpttype_adapter PUBLIC cxx_std_17) # don't bump
|
||||||
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ggml_v1 ggml_v2 ggml_v3 ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ggml_v1 ggml_v2 ggml_v3 ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
@ -522,7 +522,7 @@ set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
if (LLAMA_CUBLAS)
|
if (LLAMA_CUBLAS)
|
||||||
set(TARGET koboldcpp_cublas)
|
set(TARGET koboldcpp_cublas)
|
||||||
add_library(${TARGET} SHARED expose.cpp expose.h)
|
add_library(${TARGET} SHARED expose.cpp expose.h)
|
||||||
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
|
||||||
target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
|
target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
|
||||||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
|
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
|
||||||
|
@ -542,7 +542,7 @@ endif()
|
||||||
if (LLAMA_HIPBLAS)
|
if (LLAMA_HIPBLAS)
|
||||||
set(TARGET koboldcpp_hipblas)
|
set(TARGET koboldcpp_hipblas)
|
||||||
add_library(${TARGET} SHARED expose.cpp expose.h)
|
add_library(${TARGET} SHARED expose.cpp expose.h)
|
||||||
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(${TARGET} PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
|
||||||
target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
|
target_compile_features(${TARGET} PUBLIC cxx_std_17) # don't bump
|
||||||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
|
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
|
||||||
|
|
20
Makefile
|
@ -511,7 +511,7 @@ sgemm_failsafe.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamaf
|
||||||
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
||||||
ggml-alloc.o: ggml/src/ggml-alloc.c ggml/include/ggml.h ggml/include/ggml-alloc.h
|
ggml-alloc.o: ggml/src/ggml-alloc.c ggml/include/ggml.h ggml/include/ggml-alloc.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
llava.o: examples/llava/llava.cpp examples/llava/llava.h
|
llava.o: tools/llava/llava.cpp tools/llava/llava.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
unicode.o: src/unicode.cpp src/unicode.h
|
unicode.o: src/unicode.cpp src/unicode.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
@ -541,11 +541,11 @@ ggml-backend-reg_vulkan.o: ggml/src/ggml-backend-reg.cpp ggml/src/ggml-backend-i
|
||||||
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
ggml-backend-reg_cublas.o: ggml/src/ggml-backend-reg.cpp ggml/src/ggml-backend-impl.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/include/ggml-cpu.h
|
ggml-backend-reg_cublas.o: ggml/src/ggml-backend-reg.cpp ggml/src/ggml-backend-impl.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/include/ggml-cpu.h
|
||||||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
llavaclip_default.o: examples/llava/clip.cpp examples/llava/clip.h
|
llavaclip_default.o: tools/llava/clip.cpp tools/llava/clip.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h
|
llavaclip_cublas.o: tools/llava/clip.cpp tools/llava/clip.h
|
||||||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
llavaclip_vulkan.o: examples/llava/clip.cpp examples/llava/clip.h
|
llavaclip_vulkan.o: tools/llava/clip.cpp tools/llava/clip.h
|
||||||
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#this is only used for accelerate
|
#this is only used for accelerate
|
||||||
|
@ -663,17 +663,17 @@ clean:
|
||||||
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
||||||
|
|
||||||
# useful tools
|
# useful tools
|
||||||
main: examples/main/main.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
main: tools/main/main.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable-diffusion.cpp otherarch/sdcpp/upscaler.cpp otherarch/sdcpp/model.cpp otherarch/sdcpp/thirdparty/zip.c build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
ttsmain: examples/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
ttsmain: tools/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
gguf-split: tools/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
mtmd-cli: examples/llava/mtmd-cli.cpp examples/llava/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
mtmd-cli: tools/llava/mtmd-cli.cpp tools/llava/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
ggml/src/ggml-vulkan-shaders.cpp:
|
ggml/src/ggml-vulkan-shaders.cpp:
|
||||||
|
@ -817,7 +817,7 @@ koboldcpp_vulkan_noavx2:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# tools
|
# tools
|
||||||
quantize_gguf: examples/quantize/quantize.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
quantize_gguf: tools/quantize/quantize.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
quantize_gptj: otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
quantize_gptj: otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
|
@ -827,7 +827,7 @@ quantize_neox: otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
quantize_clip: examples/llava/clip.cpp examples/llava/clip.h examples/llava/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
quantize_clip: tools/llava/clip.cpp tools/llava/clip.h tools/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
#window simple clinfo
|
#window simple clinfo
|
||||||
|
|
|
@ -2212,14 +2212,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--mmproj"}, "FILE",
|
{"--mmproj"}, "FILE",
|
||||||
"path to a multimodal projector file. see examples/llava/README.md",
|
"path to a multimodal projector file. see tools/llava/README.md",
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
params.mmproj.path = value;
|
params.mmproj.path = value;
|
||||||
}
|
}
|
||||||
).set_examples(mmproj_examples));
|
).set_examples(mmproj_examples));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--mmproj-url"}, "URL",
|
{"--mmproj-url"}, "URL",
|
||||||
"URL to a multimodal projector file. see examples/llava/README.md",
|
"URL to a multimodal projector file. see tools/llava/README.md",
|
||||||
[](common_params & params, const std::string & value) {
|
[](common_params & params, const std::string & value) {
|
||||||
params.mmproj.url = value;
|
params.mmproj.url = value;
|
||||||
}
|
}
|
||||||
|
|
|
@ -336,7 +336,7 @@ struct common_params {
|
||||||
|
|
||||||
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
||||||
|
|
||||||
// multimodal models (see examples/llava)
|
// multimodal models (see tools/llava)
|
||||||
struct common_params_model mmproj;
|
struct common_params_model mmproj;
|
||||||
bool mmproj_use_gpu = true; // use GPU for multimodal model
|
bool mmproj_use_gpu = true; // use GPU for multimodal model
|
||||||
bool no_mmproj = false; // explicitly disable multimodal model
|
bool no_mmproj = false; // explicitly disable multimodal model
|
||||||
|
@ -410,8 +410,8 @@ struct common_params {
|
||||||
int n_pca_batch = 100;
|
int n_pca_batch = 100;
|
||||||
int n_pca_iterations = 1000;
|
int n_pca_iterations = 1000;
|
||||||
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
||||||
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
|
std::string cvector_positive_file = "tools/cvector-generator/positive.txt";
|
||||||
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
std::string cvector_negative_file = "tools/cvector-generator/negative.txt";
|
||||||
|
|
||||||
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
||||||
|
|
||||||
|
|
|
@ -1,265 +0,0 @@
|
||||||
#include "ggml.h"
|
|
||||||
#include "gguf.h"
|
|
||||||
|
|
||||||
#include <cstdio>
|
|
||||||
#include <string>
|
|
||||||
#include <sstream>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#undef MIN
|
|
||||||
#undef MAX
|
|
||||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
||||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static std::string to_string(const T & val) {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << val;
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool gguf_ex_write(const std::string & fname) {
|
|
||||||
struct gguf_context * ctx = gguf_init_empty();
|
|
||||||
|
|
||||||
gguf_set_val_u8 (ctx, "some.parameter.uint8", 0x12);
|
|
||||||
gguf_set_val_i8 (ctx, "some.parameter.int8", -0x13);
|
|
||||||
gguf_set_val_u16 (ctx, "some.parameter.uint16", 0x1234);
|
|
||||||
gguf_set_val_i16 (ctx, "some.parameter.int16", -0x1235);
|
|
||||||
gguf_set_val_u32 (ctx, "some.parameter.uint32", 0x12345678);
|
|
||||||
gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
|
|
||||||
gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
|
|
||||||
gguf_set_val_u64 (ctx, "some.parameter.uint64", 0x123456789abcdef0ull);
|
|
||||||
gguf_set_val_i64 (ctx, "some.parameter.int64", -0x123456789abcdef1ll);
|
|
||||||
gguf_set_val_f64 (ctx, "some.parameter.float64", 0.1234567890123456789);
|
|
||||||
gguf_set_val_bool(ctx, "some.parameter.bool", true);
|
|
||||||
gguf_set_val_str (ctx, "some.parameter.string", "hello world");
|
|
||||||
|
|
||||||
gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
|
|
||||||
gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
|
|
||||||
gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
|
|
||||||
|
|
||||||
struct ggml_init_params params = {
|
|
||||||
/*.mem_size =*/ 128ull*1024ull*1024ull,
|
|
||||||
/*.mem_buffer =*/ NULL,
|
|
||||||
/*.no_alloc =*/ false,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ggml_context * ctx_data = ggml_init(params);
|
|
||||||
|
|
||||||
const int n_tensors = 10;
|
|
||||||
|
|
||||||
// tensor infos
|
|
||||||
for (int i = 0; i < n_tensors; ++i) {
|
|
||||||
const std::string name = "tensor_" + to_string(i);
|
|
||||||
|
|
||||||
int64_t ne[GGML_MAX_DIMS] = { 1 };
|
|
||||||
int32_t n_dims = rand() % GGML_MAX_DIMS + 1;
|
|
||||||
|
|
||||||
for (int j = 0; j < n_dims; ++j) {
|
|
||||||
ne[j] = rand() % 10 + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ggml_tensor * cur = ggml_new_tensor(ctx_data, GGML_TYPE_F32, n_dims, ne);
|
|
||||||
ggml_set_name(cur, name.c_str());
|
|
||||||
|
|
||||||
{
|
|
||||||
float * data = (float *) cur->data;
|
|
||||||
for (int j = 0; j < ggml_nelements(cur); ++j) {
|
|
||||||
data[j] = 100 + i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
gguf_add_tensor(ctx, cur);
|
|
||||||
}
|
|
||||||
|
|
||||||
gguf_write_to_file(ctx, fname.c_str(), false);
|
|
||||||
|
|
||||||
printf("%s: wrote file '%s;\n", __func__, fname.c_str());
|
|
||||||
|
|
||||||
ggml_free(ctx_data);
|
|
||||||
gguf_free(ctx);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// just read tensor info
|
|
||||||
static bool gguf_ex_read_0(const std::string & fname) {
|
|
||||||
struct gguf_init_params params = {
|
|
||||||
/*.no_alloc = */ false,
|
|
||||||
/*.ctx = */ NULL,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
|
||||||
|
|
||||||
if (!ctx) {
|
|
||||||
fprintf(stderr, "%s: failed to load '%s'\n", __func__, fname.c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("%s: version: %d\n", __func__, gguf_get_version(ctx));
|
|
||||||
printf("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
|
|
||||||
printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
|
|
||||||
|
|
||||||
// kv
|
|
||||||
{
|
|
||||||
const int n_kv = gguf_get_n_kv(ctx);
|
|
||||||
|
|
||||||
printf("%s: n_kv: %d\n", __func__, n_kv);
|
|
||||||
|
|
||||||
for (int i = 0; i < n_kv; ++i) {
|
|
||||||
const char * key = gguf_get_key(ctx, i);
|
|
||||||
|
|
||||||
printf("%s: kv[%d]: key = %s\n", __func__, i, key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// find kv string
|
|
||||||
{
|
|
||||||
const char * findkey = "some.parameter.string";
|
|
||||||
|
|
||||||
const int keyidx = gguf_find_key(ctx, findkey);
|
|
||||||
if (keyidx == -1) {
|
|
||||||
printf("%s: find key: %s not found.\n", __func__, findkey);
|
|
||||||
} else {
|
|
||||||
const char * key_value = gguf_get_val_str(ctx, keyidx);
|
|
||||||
printf("%s: find key: %s found, kv[%d] value = %s\n", __func__, findkey, keyidx, key_value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// tensor info
|
|
||||||
{
|
|
||||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
|
||||||
|
|
||||||
printf("%s: n_tensors: %d\n", __func__, n_tensors);
|
|
||||||
|
|
||||||
for (int i = 0; i < n_tensors; ++i) {
|
|
||||||
const char * name = gguf_get_tensor_name (ctx, i);
|
|
||||||
const size_t size = gguf_get_tensor_size (ctx, i);
|
|
||||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
|
||||||
|
|
||||||
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
gguf_free(ctx);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// read and create ggml_context containing the tensors and their data
|
|
||||||
static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
|
|
||||||
struct ggml_context * ctx_data = NULL;
|
|
||||||
|
|
||||||
struct gguf_init_params params = {
|
|
||||||
/*.no_alloc = */ false,
|
|
||||||
/*.ctx = */ &ctx_data,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
|
||||||
|
|
||||||
printf("%s: version: %d\n", __func__, gguf_get_version(ctx));
|
|
||||||
printf("%s: alignment: %zu\n", __func__, gguf_get_alignment(ctx));
|
|
||||||
printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx));
|
|
||||||
|
|
||||||
// kv
|
|
||||||
{
|
|
||||||
const int n_kv = gguf_get_n_kv(ctx);
|
|
||||||
|
|
||||||
printf("%s: n_kv: %d\n", __func__, n_kv);
|
|
||||||
|
|
||||||
for (int i = 0; i < n_kv; ++i) {
|
|
||||||
const char * key = gguf_get_key(ctx, i);
|
|
||||||
|
|
||||||
printf("%s: kv[%d]: key = %s\n", __func__, i, key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// tensor info
|
|
||||||
{
|
|
||||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
|
||||||
|
|
||||||
printf("%s: n_tensors: %d\n", __func__, n_tensors);
|
|
||||||
|
|
||||||
for (int i = 0; i < n_tensors; ++i) {
|
|
||||||
const char * name = gguf_get_tensor_name (ctx, i);
|
|
||||||
const size_t size = gguf_get_tensor_size (ctx, i);
|
|
||||||
const size_t offset = gguf_get_tensor_offset(ctx, i);
|
|
||||||
|
|
||||||
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// data
|
|
||||||
{
|
|
||||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
|
||||||
|
|
||||||
for (int i = 0; i < n_tensors; ++i) {
|
|
||||||
printf("%s: reading tensor %d data\n", __func__, i);
|
|
||||||
|
|
||||||
const char * name = gguf_get_tensor_name(ctx, i);
|
|
||||||
|
|
||||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
|
||||||
|
|
||||||
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
|
|
||||||
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
|
|
||||||
|
|
||||||
// print first 10 elements
|
|
||||||
const float * data = (const float *) cur->data;
|
|
||||||
|
|
||||||
printf("%s data[:10] : ", name);
|
|
||||||
for (int j = 0; j < MIN(10, ggml_nelements(cur)); ++j) {
|
|
||||||
printf("%f ", data[j]);
|
|
||||||
}
|
|
||||||
printf("\n\n");
|
|
||||||
|
|
||||||
// check data
|
|
||||||
if (check_data) {
|
|
||||||
const float * data = (const float *) cur->data;
|
|
||||||
for (int j = 0; j < ggml_nelements(cur); ++j) {
|
|
||||||
if (data[j] != 100 + i) {
|
|
||||||
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
|
|
||||||
gguf_free(ctx);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data));
|
|
||||||
|
|
||||||
ggml_free(ctx_data);
|
|
||||||
gguf_free(ctx);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
|
||||||
if (argc < 3) {
|
|
||||||
printf("usage: %s data.gguf r|w [n]\n", argv[0]);
|
|
||||||
printf("r: read data.gguf file\n");
|
|
||||||
printf("w: write data.gguf file\n");
|
|
||||||
printf("n: no check of tensor data\n");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
bool check_data = true;
|
|
||||||
if (argc == 4) {
|
|
||||||
check_data = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
srand(123456);
|
|
||||||
|
|
||||||
const std::string fname(argv[1]);
|
|
||||||
const std::string mode (argv[2]);
|
|
||||||
|
|
||||||
GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
|
|
||||||
|
|
||||||
if (mode == "w") {
|
|
||||||
GGML_ASSERT(gguf_ex_write(fname) && "failed to write gguf file");
|
|
||||||
} else if (mode == "r") {
|
|
||||||
GGML_ASSERT(gguf_ex_read_0(fname) && "failed to read gguf file");
|
|
||||||
GGML_ASSERT(gguf_ex_read_1(fname, check_data) && "failed to read gguf file");
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
|
@ -40,8 +40,8 @@
|
||||||
#include "neox_v2.cpp"
|
#include "neox_v2.cpp"
|
||||||
#include "neox_v3.cpp"
|
#include "neox_v3.cpp"
|
||||||
#include "mpt_v3.cpp"
|
#include "mpt_v3.cpp"
|
||||||
#include "examples/llava/clip.h"
|
#include "tools/llava/clip.h"
|
||||||
#include "examples/llava/llava.h"
|
#include "tools/llava/llava.h"
|
||||||
#include "common/common.h"
|
#include "common/common.h"
|
||||||
|
|
||||||
//const
|
//const
|
||||||
|
|
39
tools/CMakeLists.txt
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
# dependencies
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
# third-party
|
||||||
|
|
||||||
|
# ...
|
||||||
|
|
||||||
|
# flags
|
||||||
|
|
||||||
|
llama_add_compile_flags()
|
||||||
|
|
||||||
|
# tools
|
||||||
|
|
||||||
|
if (EMSCRIPTEN)
|
||||||
|
else()
|
||||||
|
add_subdirectory(batched-bench)
|
||||||
|
add_subdirectory(gguf-split)
|
||||||
|
add_subdirectory(imatrix)
|
||||||
|
add_subdirectory(llama-bench)
|
||||||
|
add_subdirectory(main)
|
||||||
|
add_subdirectory(perplexity)
|
||||||
|
add_subdirectory(quantize)
|
||||||
|
if (LLAMA_BUILD_SERVER)
|
||||||
|
add_subdirectory(server)
|
||||||
|
endif()
|
||||||
|
add_subdirectory(run)
|
||||||
|
add_subdirectory(tokenize)
|
||||||
|
add_subdirectory(tts)
|
||||||
|
if (NOT GGML_BACKEND_DL)
|
||||||
|
# these examples use the backends directly and cannot be built with dynamic loading
|
||||||
|
add_subdirectory(cvector-generator)
|
||||||
|
add_subdirectory(export-lora)
|
||||||
|
add_subdirectory(llava)
|
||||||
|
if (GGML_RPC)
|
||||||
|
add_subdirectory(rpc)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
Before Width: | Height: | Size: 121 KiB After Width: | Height: | Size: 121 KiB |
BIN
tools/server/public/index.html.gz
Normal file
Before Width: | Height: | Size: 4 KiB After Width: | Height: | Size: 4 KiB |
Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 117 KiB |
Before Width: | Height: | Size: 4 KiB After Width: | Height: | Size: 4 KiB |
Before Width: | Height: | Size: 4 KiB After Width: | Height: | Size: 4 KiB |
Before Width: | Height: | Size: 75 KiB After Width: | Height: | Size: 75 KiB |
Before Width: | Height: | Size: 254 KiB After Width: | Height: | Size: 254 KiB |
Before Width: | Height: | Size: 485 KiB After Width: | Height: | Size: 485 KiB |