mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-05 23:41:45 +00:00
* devops: move s390x and ppc64le ci build
we have access to ubuntu-24.04-s390x and ppc64le images now
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: disable ppc64le for now since they have compiler errors
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: stop warnings as errors
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: switch to non-macro flag
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: going the llama macro route
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: add big-endian gguf test models
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: disable ppc64le to test s390x, check test build
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: dup .gguf.inp files for big-endian tests
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: dup .gguf.out files for big-endian too
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: add python setup and endian byteswap
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: pooring thing does not have s390x python3
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: add missing rust compiler for s390x
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: try rust actions runner
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Revert "devops: try rust actions runner"
This reverts commit 3f8db04356033d6c1d7eccc75ca396bc5298250c.
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: try a different path for rust
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: dump home directory and user info
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: install gguf-py only
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: missed relative path
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: remove big-endian files since local swapping is working
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: revert test-tokenizer-0 cmakelists
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Fix unicode flags conversion from and to uint16_t
Bitfields are allocated in different order on s390x
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Simplify byteswap command
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Add byteswapping and git-lfs for test-tokenizers-ggml-vocabs
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Fix endianness detection in vocab loader
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Disable test-thread-safety on s390x
In this test a model is downloaded,
then immediately loaded to check if more downloads are needed,
and then used for test.
There is no clean way to separate all those steps
to add byteswapping between them, so just skip this test.
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Fix q8_0 test in test-quantize-fns
vec_signed uses unexpected rounding mode.
Explicitly use different rounding function.
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: add big-endian stories260K
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: add s390x test-eval-callback
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: fix test does not exist
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: fix model not found llama-eval-callback
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Fix q3_K dot product error in test-quantize-fns on s390x
Array q8bytes had only 4 elements allocated, but 8 elements accessed.
This lead to write out of bounds and later read of overwritten values out of bounds
and incorrect result.
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: re-enable ppc64le for testing
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: activate test-thread-safety for s390x
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: disable ppc64le tests
for some reason it keeps failing test-thread-safety tests and I do not
have a machine that is able to replicate the tests.
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* devops: LLAMA_FATAL_WARNINGS=ON
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Correct repository URL for s390x for test-thread-safety model
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Fix fs_get_cache_directory
Ensure it works even if both XDG_CACHE_HOME and HOME are unset.
This might happen in containers.
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Re-enable CI for ppc64le
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Fortify ggml_rope_impl
Only memcpy data from sections argument if it's non-NULL.
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
* Add TODO in struct unicode_cpt_flags to reimplement it in endian-independent way
* Update URL for big-endian model
* Update .github/workflows/build.yml
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update remaining mentions of BE models to ggml-org/models repo
---------
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
Co-authored-by: Aleksei Nikiforov <aleksei.nikiforov@linux.ibm.com>
Co-authored-by: Aleksei Nikiforov <103434461+AlekseiNikiforovIBM@users.noreply.github.com>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
228 lines
9.8 KiB
CMake
228 lines
9.8 KiB
CMake
llama_add_compile_flags()
|
|
|
|
function(llama_build source)
|
|
if (DEFINED LLAMA_TEST_NAME)
|
|
set(TEST_TARGET ${LLAMA_TEST_NAME})
|
|
else()
|
|
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
|
endif()
|
|
|
|
add_executable(${TEST_TARGET} ${source})
|
|
target_link_libraries(${TEST_TARGET} PRIVATE common)
|
|
install(TARGETS ${TEST_TARGET} RUNTIME)
|
|
endfunction()
|
|
|
|
function(llama_test target)
|
|
include(CMakeParseArguments)
|
|
set(options)
|
|
set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
|
|
set(multiValueArgs ARGS)
|
|
cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
|
|
|
if (NOT DEFINED LLAMA_TEST_LABEL)
|
|
set(LLAMA_TEST_LABEL "main")
|
|
endif()
|
|
if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
|
|
set(LLAMA_TEST_WORKING_DIRECTORY .)
|
|
endif()
|
|
if (DEFINED LLAMA_TEST_NAME)
|
|
set(TEST_NAME ${LLAMA_TEST_NAME})
|
|
else()
|
|
set(TEST_NAME ${target})
|
|
endif()
|
|
|
|
set(TEST_TARGET ${target})
|
|
|
|
add_test(
|
|
NAME ${TEST_NAME}
|
|
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
|
|
COMMAND $<TARGET_FILE:${TEST_TARGET}>
|
|
${LLAMA_TEST_ARGS})
|
|
|
|
set_property(TEST ${TEST_NAME} PROPERTY LABELS ${LLAMA_TEST_LABEL})
|
|
endfunction()
|
|
|
|
function(llama_test_cmd target)
|
|
include(CMakeParseArguments)
|
|
set(options)
|
|
set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
|
|
set(multiValueArgs ARGS)
|
|
cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
|
|
|
if (NOT DEFINED LLAMA_TEST_LABEL)
|
|
set(LLAMA_TEST_LABEL "main")
|
|
endif()
|
|
if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
|
|
set(LLAMA_TEST_WORKING_DIRECTORY .)
|
|
endif()
|
|
if (DEFINED LLAMA_TEST_NAME)
|
|
set(TEST_NAME ${LLAMA_TEST_NAME})
|
|
else()
|
|
set(TEST_NAME ${target})
|
|
endif()
|
|
|
|
add_test(
|
|
NAME ${TEST_NAME}
|
|
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
|
|
COMMAND ${target}
|
|
${LLAMA_TEST_ARGS})
|
|
|
|
set_property(TEST ${TEST_NAME} PROPERTY LABELS ${LLAMA_TEST_LABEL})
|
|
endfunction()
|
|
|
|
# Builds and runs a test source file.
|
|
# Optional args:
|
|
# - NAME: name of the executable & test target (defaults to the source file name without extension)
|
|
# - LABEL: label for the test (defaults to main)
|
|
# - ARGS: arguments to pass to the test executable
|
|
# - WORKING_DIRECTORY
|
|
function(llama_build_and_test source)
|
|
include(CMakeParseArguments)
|
|
set(options)
|
|
set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
|
|
set(multiValueArgs ARGS)
|
|
cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
|
|
|
if (NOT DEFINED LLAMA_TEST_LABEL)
|
|
set(LLAMA_TEST_LABEL "main")
|
|
endif()
|
|
if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
|
|
set(LLAMA_TEST_WORKING_DIRECTORY .)
|
|
endif()
|
|
if (DEFINED LLAMA_TEST_NAME)
|
|
set(TEST_TARGET ${LLAMA_TEST_NAME})
|
|
else()
|
|
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
|
endif()
|
|
|
|
add_executable(${TEST_TARGET} ${source} get-model.cpp)
|
|
install(TARGETS ${TEST_TARGET} RUNTIME)
|
|
target_link_libraries(${TEST_TARGET} PRIVATE common)
|
|
|
|
add_test(
|
|
NAME ${TEST_TARGET}
|
|
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
|
|
COMMAND $<TARGET_FILE:${TEST_TARGET}>
|
|
${LLAMA_TEST_ARGS})
|
|
|
|
set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL})
|
|
endfunction()
|
|
|
|
# build test-tokenizer-0 target once and add many tests
|
|
llama_build(test-tokenizer-0.cpp)
|
|
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-bert-bge ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-bert-bge.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-command-r.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-coder.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-llm.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-falcon.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-2.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-mpt ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-mpt.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-phi-3 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-phi-3.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-qwen2.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-refact.gguf)
|
|
llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-starcoder.gguf)
|
|
|
|
if (NOT WIN32)
|
|
llama_test_cmd(
|
|
${CMAKE_CURRENT_SOURCE_DIR}/test-tokenizers-repo.sh
|
|
NAME test-tokenizers-ggml-vocabs
|
|
WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}
|
|
ARGS https://huggingface.co/ggml-org/vocabs ${PROJECT_SOURCE_DIR}/models/ggml-vocabs
|
|
)
|
|
endif()
|
|
|
|
if (LLAMA_LLGUIDANCE)
|
|
llama_build_and_test(test-grammar-llguidance.cpp ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf)
|
|
endif ()
|
|
|
|
if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
|
|
# these tests are disabled on Windows because they use internal functions not exported with LLAMA_API (when building with shared libraries)
|
|
llama_build_and_test(test-sampling.cpp)
|
|
llama_build_and_test(test-grammar-parser.cpp)
|
|
llama_build_and_test(test-grammar-integration.cpp)
|
|
llama_build_and_test(test-llama-grammar.cpp)
|
|
llama_build_and_test(test-chat.cpp)
|
|
# TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
|
|
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
|
llama_build_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
|
target_include_directories(test-json-schema-to-grammar PRIVATE ${PROJECT_SOURCE_DIR}/tools/server)
|
|
endif()
|
|
|
|
if (NOT GGML_BACKEND_DL)
|
|
llama_build(test-quantize-stats.cpp)
|
|
endif()
|
|
|
|
llama_build(test-gbnf-validator.cpp)
|
|
|
|
# build test-tokenizer-1-bpe target once and add many tests
|
|
llama_build(test-tokenizer-1-bpe.cpp)
|
|
|
|
# TODO: disabled due to slowness
|
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-aquila.gguf)
|
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-falcon.gguf)
|
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-2.gguf)
|
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-neox.gguf)
|
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf --ignore-merges)
|
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-mpt.gguf)
|
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-refact.gguf)
|
|
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-starcoder.gguf)
|
|
|
|
# build test-tokenizer-1-spm target once and add many tests
|
|
llama_build(test-tokenizer-1-spm.cpp)
|
|
|
|
llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf)
|
|
#llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-baichuan.gguf)
|
|
|
|
# llama_build_and_test(test-double-float.cpp) # SLOW
|
|
endif()
|
|
|
|
llama_build_and_test(test-chat-parser.cpp)
|
|
llama_build_and_test(test-chat-template.cpp)
|
|
llama_build_and_test(test-json-partial.cpp)
|
|
llama_build_and_test(test-log.cpp)
|
|
llama_build_and_test(test-regex-partial.cpp)
|
|
|
|
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
|
|
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
|
|
else()
|
|
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-be.Q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
|
|
endif()
|
|
|
|
# this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135)
|
|
if (NOT WIN32)
|
|
llama_build_and_test(test-arg-parser.cpp)
|
|
endif()
|
|
|
|
if (NOT LLAMA_SANITIZE_ADDRESS)
|
|
# TODO: repair known memory leaks
|
|
llama_build_and_test(test-opt.cpp)
|
|
endif()
|
|
llama_build_and_test(test-gguf.cpp)
|
|
llama_build_and_test(test-backend-ops.cpp)
|
|
|
|
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
|
|
llama_build_and_test(test-autorelease.cpp LABEL "model")
|
|
|
|
if (NOT GGML_BACKEND_DL)
|
|
# these tests use the backends directly and cannot be built with dynamic loading
|
|
llama_build_and_test(test-barrier.cpp)
|
|
llama_build_and_test(test-quantize-fns.cpp)
|
|
llama_build_and_test(test-quantize-perf.cpp)
|
|
llama_build_and_test(test-rope.cpp)
|
|
endif()
|
|
|
|
# libmtmd
|
|
set(LLAMA_TEST_NAME test-mtmd-c-api)
|
|
llama_build_and_test(test-mtmd-c-api.c)
|
|
target_link_libraries(${LLAMA_TEST_NAME} PRIVATE mtmd)
|
|
|
|
# dummy executable - not installed
|
|
get_filename_component(TEST_TARGET test-c.c NAME_WE)
|
|
add_executable(${TEST_TARGET} test-c.c)
|
|
target_link_libraries(${TEST_TARGET} PRIVATE llama)
|
|
|
|
llama_build_and_test(test-alloc.cpp)
|
|
target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
|