From feff4aa8461da7c432d144c11da4802e41fef3cf Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 13 Sep 2024 14:23:11 +0200 Subject: [PATCH 1/8] server : add loading html page while model is loading (#9468) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adding loading page for '/' server requests * set content when model is loading * removed loading html file * updated cmakelist * updated makefile * cleaned up whitespace * cleanup for PR removed error * updated server test to handle 503 HTML * updated server test to handle 503 HTML * ca†ch 503 before parsing json * revert test * account for both api and web browser requests * precommit corrections * eol fix * revert changes to pre-commit * removed print statement * made loading message more descriptive * also support .html files --------- Co-authored-by: VJHack Co-authored-by: Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com> --- Makefile | 1 + examples/server/CMakeLists.txt | 1 + examples/server/public/loading.html | 12 ++++++++++++ examples/server/server.cpp | 11 +++++++++-- 4 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 examples/server/public/loading.html diff --git a/Makefile b/Makefile index 8d3fd3ee8..f41887a4d 100644 --- a/Makefile +++ b/Makefile @@ -1440,6 +1440,7 @@ llama-server: \ examples/server/system-prompts.js.hpp \ examples/server/prompt-formats.js.hpp \ examples/server/json-schema-to-grammar.mjs.hpp \ + examples/server/loading.html.hpp \ common/json.hpp \ common/stb_image.h \ $(OBJ_ALL) diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index dbe41f1fd..580f3a824 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -30,6 +30,7 @@ set(PUBLIC_ASSETS system-prompts.js prompt-formats.js json-schema-to-grammar.mjs + loading.html ) foreach(asset ${PUBLIC_ASSETS}) diff --git a/examples/server/public/loading.html b/examples/server/public/loading.html new file mode 100644 index 000000000..c3fd19a0f --- /dev/null +++ b/examples/server/public/loading.html @@ -0,0 +1,12 @@ + + + + + + +
+ The model is loading. Please wait.
+ The user interface will appear soon. +
+ + diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 5e4dffadf..73cd6aae7 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -28,6 +28,7 @@ #include "system-prompts.js.hpp" #include "prompt-formats.js.hpp" #include "json-schema-to-grammar.mjs.hpp" +#include "loading.html.hpp" #include #include @@ -2592,10 +2593,16 @@ int main(int argc, char ** argv) { return false; }; - auto middleware_server_state = [&res_error, &state](const httplib::Request &, httplib::Response & res) { + auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) { server_state current_state = state.load(); if (current_state == SERVER_STATE_LOADING_MODEL) { - res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); + auto tmp = string_split(req.path, '.'); + if (req.path == "/" || tmp.back() == "html") { + res.set_content(reinterpret_cast(loading_html), loading_html_len, "text/html; charset=utf-8"); + res.status = 503; + } else { + res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); + } return false; } return true; From befaf1197fa447f61714de041828852a270659d2 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sat, 14 Sep 2024 09:50:12 +0200 Subject: [PATCH 2/8] llama : make cell_id const in inp_s_mask block (#9470) This commit makes the cell_id variable const in the inp_s_mask block. The motivation for this change is consistency with the code in the inp_s_copy block. --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 65afcc84a..1986a90fb 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -15826,7 +15826,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_ubatch & batch) { // clear unused states for (int i = 0; i < n_kv; ++i) { - uint32_t cell_id = i + kv_self.head; + const uint32_t cell_id = i + kv_self.head; llama_kv_cell & kv_cell = lctx.kv_self.cells[cell_id]; data[i] = (float) (kv_cell.src >= 0); From 1f4111e540bacec8d00ca9fd96417bf4c1339394 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 14 Sep 2024 10:55:05 +0300 Subject: [PATCH 3/8] cmake : use list(APPEND ...) instead of set() + dedup linker (#9463) * cmake : use list(APPEND ...) instead of set() + dedup linker ggml-ci * cmake : try fix sycl * cmake : try to fix sycl 2 * cmake : fix sycl build (#9469) * try fix sycl build * use CMAKE_CXX_FLAGS as a string variable --------- Co-authored-by: Georgi Gerganov * one more CMAKE_CXX_FLAGS fix (#9471) --------- Co-authored-by: Michael Podvitskiy --- ggml/src/CMakeLists.txt | 77 ++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index cd2dcd066..506b6dc7b 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -26,6 +26,8 @@ if (NOT MSVC) endif() endif() +unset(GGML_EXTRA_LIBS) + if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) @@ -35,7 +37,7 @@ if (APPLE AND GGML_ACCELERATE) add_compile_definitions(ACCELERATE_NEW_LAPACK) add_compile_definitions(ACCELERATE_LAPACK_ILP64) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) + list(APPEND GGML_EXTRA_LIBS ${ACCELERATE_FRAMEWORK}) else() message(WARNING "Accelerate framework not found") endif() @@ -87,7 +89,7 @@ if (GGML_METAL) COMMENT "Generate assembly for embedded Metal library" ) - set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM}) + list(APPEND GGML_SOURCES_METAL ${METALLIB_EMBED_ASM}) else() if (GGML_METAL_SHADER_DEBUG) # custom command to do the following: @@ -132,7 +134,7 @@ if (GGML_METAL) ) endif() # GGML_METAL_EMBED_LIBRARY - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} + list(APPEND GGML_EXTRA_LIBS ${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK} @@ -157,11 +159,11 @@ if (GGML_OPENMP) add_compile_definitions(GGML_USE_OPENMP) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX) + list(APPEND GGML_EXTRA_LIBS OpenMP::OpenMP_C OpenMP::OpenMP_CXX) if (GGML_MUSA) - set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} "/usr/lib/llvm-10/include/openmp") - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} "/usr/lib/llvm-10/lib/libomp.so") + list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-10/include/openmp") + list(APPEND GGML_EXTRA_LIBS "/usr/lib/llvm-10/lib/libomp.so") endif() else() message(WARNING "OpenMP not found") @@ -244,8 +246,8 @@ if (GGML_BLAS) set(GGML_HEADERS_BLAS ../include/ggml-blas.h) set(GGML_SOURCES_BLAS ggml-blas.cpp) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES}) - set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS}) + list(APPEND GGML_EXTRA_LIBS ${BLAS_LIBRARIES}) + list(APPEND GGML_EXTRA_INCLUDES ${BLAS_INCLUDE_DIRS}) else() message(WARNING "BLAS not found, please refer to " "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" @@ -368,19 +370,19 @@ if (GGML_CUDA) if (GGML_STATIC) if (WIN32) # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt) + list(APPEND GGML_EXTRA_LIBS CUDA::cudart_static CUDA::cublas CUDA::cublasLt) else () if (GGML_MUSA) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart_static MUSA::mublas_static) + list(APPEND GGML_EXTRA_LIBS MUSA::musart_static MUSA::mublas_static) else() - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + list(APPEND GGML_EXTRA_LIBS CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) endif() endif() else() if (GGML_MUSA) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musart MUSA::mublas) + list(APPEND GGML_EXTRA_LIBS MUSA::musart MUSA::mublas) else() - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) + list(APPEND GGML_EXTRA_LIBS CUDA::cudart CUDA::cublas CUDA::cublasLt) endif() endif() @@ -388,9 +390,9 @@ if (GGML_CUDA) # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so) else() if (GGML_MUSA) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ... + list(APPEND GGML_EXTRA_LIBS MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ... else() - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... + list(APPEND GGML_EXTRA_LIBS CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... endif() endif() else() @@ -495,7 +497,7 @@ if (GGML_HIPBLAS) if (CXX_IS_HIPCC) set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} hip::device) + list(APPEND GGML_EXTRA_LIBS hip::device) else() set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP) endif() @@ -504,7 +506,8 @@ if (GGML_HIPBLAS) message(FATAL_ERROR "Static linking not supported for HIP/ROCm") endif() - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas) + # TODO: this "PUBLIC" here seems wrong + list(APPEND GGML_EXTRA_LIBS PUBLIC hip::host roc::rocblas roc::hipblas) endif() if (GGML_SYCL) @@ -513,7 +516,8 @@ if (GGML_SYCL) endif() check_cxx_compiler_flag("-fsycl" SUPPORTS_SYCL) - if ( DEFINED ENV{ONEAPI_ROOT}) + + if (DEFINED ENV{ONEAPI_ROOT}) message(STATUS "Using oneAPI Release SYCL compiler (icpx).") elseif(SUPPORTS_SYCL) message(WARNING "Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}. @@ -551,21 +555,27 @@ if (GGML_SYCL) find_package(DNNL) message("-- DNNL found:" ${DNNL_FOUND}) + if (GGML_SYCL_TARGET STREQUAL "INTEL") add_compile_definitions(GGML_SYCL_DNNL=${DNNL_FOUND}) else() add_compile_definitions(GGML_SYCL_DNNL=0) endif() + + if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL") + list(APPEND GGML_EXTRA_LIBS DNNL::dnnl) + endif() + if (WIN32) find_package(IntelSYCL REQUIRED) find_package(MKL REQUIRED) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL) + list(APPEND GGML_EXTRA_LIBS IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL) else() if (GGML_SYCL_TARGET STREQUAL "INTEL") - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) + list(APPEND GGML_EXTRA_LIBS OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} -fsycl pthread m dl onemkl) + list(APPEND GGML_EXTRA_LIBS pthread m dl onemkl) endif() endif() if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL") @@ -579,7 +589,7 @@ if (GGML_RPC) list(APPEND GGML_CDEF_PUBLIC GGML_USE_RPC) if (WIN32) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ws2_32) + list(APPEND GGML_EXTRA_LIBS ws2_32) endif() set(GGML_HEADERS_RPC ../include/ggml-rpc.h) @@ -657,8 +667,8 @@ if (GGML_VULKAN) set(GGML_HEADERS_VULKAN ${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml-vulkan.h ${_ggml_vk_header}) set(GGML_SOURCES_VULKAN ggml-vulkan.cpp ${_ggml_vk_source}) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} Vulkan::Vulkan) - set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR}) + list(APPEND GGML_EXTRA_LIBS Vulkan::Vulkan) + list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) else() message(WARNING "Vulkan not found") endif() @@ -817,8 +827,8 @@ if (GGML_KOMPUTE) list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} kompute) - set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_CURRENT_BINARY_DIR}) + list(APPEND GGML_EXTRA_LIBS kompute) + list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) else() message(WARNING "Kompute not found") endif() @@ -883,9 +893,10 @@ if (GGML_CANN) message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}") message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}") - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${CANN_LIBRARIES} ) - set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS}) - set(GGML_EXTRA_LIBDIRS ${GGML_EXTRA_LIBDIRS} ${CANN_INSTALL_DIR}/lib64) + list(APPEND GGML_EXTRA_LIBS ${CANN_LIBRARIES} ) + list(APPEND GGML_EXTRA_INCLUDES ${CANN_INCLUDE_DIRS}) + list(APPEND GGML_EXTRA_LIBDIRS ${CANN_INSTALL_DIR}/lib64) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN) endif() else() @@ -1322,12 +1333,14 @@ if (EMSCRIPTEN) set_target_properties(ggml PROPERTIES COMPILE_FLAGS "-msimd128") endif() -target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC}) -target_include_directories(ggml PUBLIC ../include) +target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC}) +target_include_directories(ggml PUBLIC ../include) target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES}) -target_link_directories(ggml PRIVATE ${GGML_EXTRA_LIBDIRS}) +target_link_directories (ggml PRIVATE ${GGML_EXTRA_LIBDIRS}) target_compile_features (ggml PRIVATE c_std_11) # don't bump +list(REMOVE_DUPLICATES GGML_EXTRA_LIBS) + target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS}) find_library(MATH_LIBRARY m) From dcdcee3a744f39714503ee2b19c49b7c7b6209c9 Mon Sep 17 00:00:00 2001 From: VoidIsVoid <343750470@qq.com> Date: Sat, 14 Sep 2024 17:36:44 +0800 Subject: [PATCH 4/8] server: add data: [DONE] to /chat/completions stream response (#9459) --- examples/server/server.cpp | 2 ++ examples/server/tests/features/steps/steps.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 73cd6aae7..14c4af3d9 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2993,6 +2993,8 @@ int main(int argc, char ** argv) { }, [&](json error_data) { server_sent_event(sink, "error", error_data); }); + static const std::string ev_done = "data: [DONE]\n\n"; + sink.write(ev_done.data(), ev_done.size()); sink.done(); return true; }; diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 11587dd64..0f4249b13 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -1020,6 +1020,8 @@ async def oai_chat_completions(user_prompt, event_data = line.split(': ', 1) assert event_data[0] == 'data', f'Bad event code received: ```{event_data}```' chunk_raw = event_data[1] + if chunk_raw == '[DONE]': + break chunk = json.loads(chunk_raw) assert len(chunk['choices']) == 1, f"no choices provided, line ```{line}```" From 822b6322dea704110797a5671fc80ae39ee6ac97 Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Sat, 14 Sep 2024 05:54:37 -0400 Subject: [PATCH 5/8] ggml : ggml_type_name return "NONE" for invalid values (#9458) When running on Windows, the quantization utility attempts to print the types that are not set which leads to a crash. --- ggml/src/ggml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 493ff7fc0..490c8d602 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3399,7 +3399,7 @@ double ggml_type_sizef(enum ggml_type type) { } GGML_CALL const char * ggml_type_name(enum ggml_type type) { - return type_traits[type].type_name; + return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE"; } GGML_CALL bool ggml_is_quantized(enum ggml_type type) { From 7596487bebd58eade3cd0133d42a9008aaaf9d09 Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Sun, 15 Sep 2024 09:06:38 +0200 Subject: [PATCH 6/8] cmake : try to fix sycl+intel build (#9487) --- ggml/src/CMakeLists.txt | 77 +++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 506b6dc7b..b25440769 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -26,7 +26,8 @@ if (NOT MSVC) endif() endif() -unset(GGML_EXTRA_LIBS) +unset(GGML_EXTRA_LIBS_PRIVATE) +unset(GGML_EXTRA_LIBS_PUBLIC) if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) @@ -37,7 +38,7 @@ if (APPLE AND GGML_ACCELERATE) add_compile_definitions(ACCELERATE_NEW_LAPACK) add_compile_definitions(ACCELERATE_LAPACK_ILP64) - list(APPEND GGML_EXTRA_LIBS ${ACCELERATE_FRAMEWORK}) + list(APPEND GGML_EXTRA_LIBS_PRIVATE ${ACCELERATE_FRAMEWORK}) else() message(WARNING "Accelerate framework not found") endif() @@ -89,7 +90,7 @@ if (GGML_METAL) COMMENT "Generate assembly for embedded Metal library" ) - list(APPEND GGML_SOURCES_METAL ${METALLIB_EMBED_ASM}) + list(APPEND GGML_SOURCES_METAL ${METALLIB_EMBED_ASM}) else() if (GGML_METAL_SHADER_DEBUG) # custom command to do the following: @@ -134,7 +135,7 @@ if (GGML_METAL) ) endif() # GGML_METAL_EMBED_LIBRARY - list(APPEND GGML_EXTRA_LIBS + list(APPEND GGML_EXTRA_LIBS_PRIVATE ${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK} @@ -159,11 +160,11 @@ if (GGML_OPENMP) add_compile_definitions(GGML_USE_OPENMP) - list(APPEND GGML_EXTRA_LIBS OpenMP::OpenMP_C OpenMP::OpenMP_CXX) + list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) if (GGML_MUSA) - list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-10/include/openmp") - list(APPEND GGML_EXTRA_LIBS "/usr/lib/llvm-10/lib/libomp.so") + list(APPEND GGML_EXTRA_INCLUDES "/usr/lib/llvm-10/include/openmp") + list(APPEND GGML_EXTRA_LIBS_PRIVATE "/usr/lib/llvm-10/lib/libomp.so") endif() else() message(WARNING "OpenMP not found") @@ -246,8 +247,8 @@ if (GGML_BLAS) set(GGML_HEADERS_BLAS ../include/ggml-blas.h) set(GGML_SOURCES_BLAS ggml-blas.cpp) - list(APPEND GGML_EXTRA_LIBS ${BLAS_LIBRARIES}) - list(APPEND GGML_EXTRA_INCLUDES ${BLAS_INCLUDE_DIRS}) + list(APPEND GGML_EXTRA_LIBS_PRIVATE ${BLAS_LIBRARIES}) + list(APPEND GGML_EXTRA_INCLUDES ${BLAS_INCLUDE_DIRS}) else() message(WARNING "BLAS not found, please refer to " "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" @@ -370,19 +371,19 @@ if (GGML_CUDA) if (GGML_STATIC) if (WIN32) # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library - list(APPEND GGML_EXTRA_LIBS CUDA::cudart_static CUDA::cublas CUDA::cublasLt) + list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt) else () if (GGML_MUSA) - list(APPEND GGML_EXTRA_LIBS MUSA::musart_static MUSA::mublas_static) + list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart_static MUSA::mublas_static) else() - list(APPEND GGML_EXTRA_LIBS CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) endif() endif() else() if (GGML_MUSA) - list(APPEND GGML_EXTRA_LIBS MUSA::musart MUSA::mublas) + list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musart MUSA::mublas) else() - list(APPEND GGML_EXTRA_LIBS CUDA::cudart CUDA::cublas CUDA::cublasLt) + list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt) endif() endif() @@ -390,9 +391,9 @@ if (GGML_CUDA) # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so) else() if (GGML_MUSA) - list(APPEND GGML_EXTRA_LIBS MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ... + list(APPEND GGML_EXTRA_LIBS_PRIVATE MUSA::musa_driver) # required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ... else() - list(APPEND GGML_EXTRA_LIBS CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... + list(APPEND GGML_EXTRA_LIBS_PRIVATE CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... endif() endif() else() @@ -497,7 +498,7 @@ if (GGML_HIPBLAS) if (CXX_IS_HIPCC) set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX) - list(APPEND GGML_EXTRA_LIBS hip::device) + list(APPEND GGML_EXTRA_LIBS_PRIVATE hip::device) else() set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP) endif() @@ -506,8 +507,7 @@ if (GGML_HIPBLAS) message(FATAL_ERROR "Static linking not supported for HIP/ROCm") endif() - # TODO: this "PUBLIC" here seems wrong - list(APPEND GGML_EXTRA_LIBS PUBLIC hip::host roc::rocblas roc::hipblas) + list(APPEND GGML_EXTRA_LIBS_PUBLIC hip::host roc::rocblas roc::hipblas) endif() if (GGML_SYCL) @@ -563,24 +563,23 @@ if (GGML_SYCL) endif() if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL") - list(APPEND GGML_EXTRA_LIBS DNNL::dnnl) + list(APPEND GGML_EXTRA_LIBS_PRIVATE DNNL::dnnl) endif() if (WIN32) find_package(IntelSYCL REQUIRED) find_package(MKL REQUIRED) - list(APPEND GGML_EXTRA_LIBS IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL) + list(APPEND GGML_EXTRA_LIBS_PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL) else() if (GGML_SYCL_TARGET STREQUAL "INTEL") - list(APPEND GGML_EXTRA_LIBS OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsycl") + list(APPEND GGML_EXTRA_LIBS_PRIVATE OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") - list(APPEND GGML_EXTRA_LIBS pthread m dl onemkl) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsycl") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") + list(APPEND GGML_EXTRA_LIBS_PRIVATE pthread m dl onemkl) endif() endif() - if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL") - list(APPEND GGML_EXTRA_LIBS DNNL::dnnl) - endif() endif() if (GGML_RPC) @@ -589,7 +588,7 @@ if (GGML_RPC) list(APPEND GGML_CDEF_PUBLIC GGML_USE_RPC) if (WIN32) - list(APPEND GGML_EXTRA_LIBS ws2_32) + list(APPEND GGML_EXTRA_LIBS_PRIVATE ws2_32) endif() set(GGML_HEADERS_RPC ../include/ggml-rpc.h) @@ -667,8 +666,8 @@ if (GGML_VULKAN) set(GGML_HEADERS_VULKAN ${CMAKE_CURRENT_SOURCE_DIR}/../include/ggml-vulkan.h ${_ggml_vk_header}) set(GGML_SOURCES_VULKAN ggml-vulkan.cpp ${_ggml_vk_source}) - list(APPEND GGML_EXTRA_LIBS Vulkan::Vulkan) - list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) + list(APPEND GGML_EXTRA_LIBS_PRIVATE Vulkan::Vulkan) + list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) else() message(WARNING "Vulkan not found") endif() @@ -827,8 +826,8 @@ if (GGML_KOMPUTE) list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE) - list(APPEND GGML_EXTRA_LIBS kompute) - list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) + list(APPEND GGML_EXTRA_LIBS_PRIVATE kompute) + list(APPEND GGML_EXTRA_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) else() message(WARNING "Kompute not found") endif() @@ -893,9 +892,9 @@ if (GGML_CANN) message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}") message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}") - list(APPEND GGML_EXTRA_LIBS ${CANN_LIBRARIES} ) - list(APPEND GGML_EXTRA_INCLUDES ${CANN_INCLUDE_DIRS}) - list(APPEND GGML_EXTRA_LIBDIRS ${CANN_INSTALL_DIR}/lib64) + list(APPEND GGML_EXTRA_LIBS_PRIVATE ${CANN_LIBRARIES} ) + list(APPEND GGML_EXTRA_INCLUDES ${CANN_INCLUDE_DIRS}) + list(APPEND GGML_EXTRA_LIBDIRS ${CANN_INSTALL_DIR}/lib64) list(APPEND GGML_CDEF_PUBLIC GGML_USE_CANN) endif() @@ -1339,9 +1338,7 @@ target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES}) target_link_directories (ggml PRIVATE ${GGML_EXTRA_LIBDIRS}) target_compile_features (ggml PRIVATE c_std_11) # don't bump -list(REMOVE_DUPLICATES GGML_EXTRA_LIBS) - -target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS}) +list(APPEND GGML_EXTRA_LIBS_PRIVATE Threads::Threads) find_library(MATH_LIBRARY m) if (MATH_LIBRARY) @@ -1350,6 +1347,10 @@ if (MATH_LIBRARY) endif() endif() +list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PRIVATE) +list(REMOVE_DUPLICATES GGML_EXTRA_LIBS_PUBLIC) +target_link_libraries(ggml PRIVATE ${GGML_EXTRA_LIBS_PRIVATE} PUBLIC ${GGML_EXTRA_LIBS_PUBLIC}) + if (BUILD_SHARED_LIBS) set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(ggml PRIVATE GGML_SHARED GGML_BUILD) From d6b37c881f056bd32b681dcd7658a37ea6ec3a1e Mon Sep 17 00:00:00 2001 From: OSecret <135510162+OLSecret@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:36:53 +0300 Subject: [PATCH 7/8] readme : update tools list (#9475) * Added link to proprietary wrapper for Unity3d into README.md Wrapper has prebuild library and was tested on iOS, Android, WebGL, PC, Mac platforms, has online demos like [this](https://d23myu0xfn2ttc.cloudfront.net/rich/index.html) and [that](https://d23myu0xfn2ttc.cloudfront.net/). * Update README.md Fixes upon review --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 73041b1a2..9a10ead83 100644 --- a/README.md +++ b/README.md @@ -173,6 +173,7 @@ Unless otherwise noted these projects are open-source with permissive licensing: - [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML - [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption - [gpustack/gguf-parser](https://github.com/gpustack/gguf-parser-go/tree/main/cmd/gguf-parser) - review/check the GGUF file and estimate the memory usage +- [Styled Lines](https://marketplace.unity.com/packages/tools/generative-ai/styled-lines-llama-cpp-model-292902) (proprietary licensed, async wrapper of inference part for game development in Unity3d with prebuild Mobile and Web platform wrappers and a model example) **Infrastructure:** From 3c7989fd29a2db2b75e28fd708cc441febe99a82 Mon Sep 17 00:00:00 2001 From: Csaba Kecskemeti Date: Sun, 15 Sep 2024 00:48:25 -0700 Subject: [PATCH 8/8] py : add "LLaMAForCausalLM" conversion support (#9485) Co-authored-by: Csaba Kecskemeti --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 01a8a50a2..2c6d5d95b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1487,7 +1487,7 @@ class StableLMModel(Model): raise ValueError(f"Unprocessed norms: {norms}") -@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM") +@Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM") class LlamaModel(Model): model_arch = gguf.MODEL_ARCH.LLAMA