mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
merge the file structure refactor, testing
This commit is contained in:
commit
9c10486204
315 changed files with 124 additions and 568 deletions
|
@ -49,7 +49,7 @@ option(LLAMA_CUDA_F16 "llama: use 16 bit floats for dmmv
|
||||||
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
|
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
|
||||||
set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
||||||
"llama: max. batch size for using peer access")
|
"llama: max. batch size for using peer access")
|
||||||
set(GGML_CUDA_USE_GRAPHS ON)
|
set(GGML_CUDA_USE_GRAPHS OFF)
|
||||||
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
|
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
|
||||||
|
|
||||||
# Other
|
# Other
|
||||||
|
@ -69,16 +69,20 @@ find_package(Threads REQUIRED)
|
||||||
add_compile_definitions(LOG_DISABLE_LOGS)
|
add_compile_definitions(LOG_DISABLE_LOGS)
|
||||||
add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
|
add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
|
||||||
|
|
||||||
file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
|
file(GLOB GGML_SOURCES_CUDA "ggml/src/ggml-cuda/*.cu")
|
||||||
list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
|
list(APPEND GGML_SOURCES_CUDA "ggml/src/ggml-cuda.cu")
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||||
set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
|
set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
|
||||||
set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
|
set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
|
||||||
set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
|
set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
|
||||||
|
|
||||||
|
if (GGML_CUDA_USE_GRAPHS)
|
||||||
|
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (LLAMA_CUBLAS)
|
if (LLAMA_CUBLAS)
|
||||||
cmake_minimum_required(VERSION 3.17)
|
cmake_minimum_required(VERSION 3.17)
|
||||||
|
|
||||||
|
@ -102,11 +106,11 @@ if (LLAMA_CUBLAS)
|
||||||
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE})
|
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE})
|
||||||
|
|
||||||
# only build minimal quants required for fattn quant kv
|
# only build minimal quants required for fattn quant kv
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
||||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||||
|
|
||||||
if (LLAMA_STATIC)
|
if (LLAMA_STATIC)
|
||||||
|
@ -167,11 +171,11 @@ if (LLAMA_HIPBLAS)
|
||||||
|
|
||||||
if (${hipblas_FOUND} AND ${hip_FOUND})
|
if (${hipblas_FOUND} AND ${hip_FOUND})
|
||||||
message(STATUS "HIP and hipBLAS found")
|
message(STATUS "HIP and hipBLAS found")
|
||||||
file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
|
file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
|
list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda.cu")
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||||
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA SD_USE_CUBLAS)
|
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA SD_USE_CUBLAS)
|
||||||
add_library(ggml-rocm ${GGML_SOURCES_CUDA})
|
add_library(ggml-rocm ${GGML_SOURCES_CUDA})
|
||||||
|
@ -179,11 +183,11 @@ if (LLAMA_HIPBLAS)
|
||||||
target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
|
target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
||||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||||
|
|
||||||
# only build minimal quants required for fattn quant kv
|
# only build minimal quants required for fattn quant kv
|
||||||
|
@ -418,18 +422,18 @@ endif()
|
||||||
#
|
#
|
||||||
|
|
||||||
add_library(ggml
|
add_library(ggml
|
||||||
ggml.c
|
ggml/src/ggml.c
|
||||||
ggml.h
|
ggml/include/ggml.h
|
||||||
ggml-alloc.c
|
ggml/src/ggml-alloc.c
|
||||||
ggml-alloc.h
|
ggml/include/ggml-alloc.h
|
||||||
ggml-backend.c
|
ggml/src/ggml-backend.c
|
||||||
ggml-backend.h
|
ggml/include/ggml-backend.h
|
||||||
ggml-quants.c
|
ggml/src/ggml-quants.c
|
||||||
ggml-quants.h
|
ggml/src/ggml-quants.h
|
||||||
sgemm.cpp
|
ggml/src/sgemm.cpp
|
||||||
sgemm.h
|
ggml/src/sgemm.h
|
||||||
${GGML_SOURCES_CUDA})
|
${GGML_SOURCES_CUDA})
|
||||||
target_include_directories(ggml PUBLIC . ./otherarch ./otherarch/tools)
|
target_include_directories(ggml PUBLIC . ./include ./otherarch ./otherarch/tools)
|
||||||
target_compile_features(ggml PUBLIC c_std_11) # don't bump
|
target_compile_features(ggml PUBLIC c_std_11) # don't bump
|
||||||
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
@ -437,7 +441,7 @@ set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
add_library(ggml_v1
|
add_library(ggml_v1
|
||||||
otherarch/ggml_v1.c
|
otherarch/ggml_v1.c
|
||||||
otherarch/ggml_v1.h)
|
otherarch/ggml_v1.h)
|
||||||
target_include_directories(ggml_v1 PUBLIC . ./otherarch ./otherarch/tools)
|
target_include_directories(ggml_v1 PUBLIC . ./include ./otherarch ./otherarch/tools)
|
||||||
target_compile_features(ggml_v1 PUBLIC c_std_11) # don't bump
|
target_compile_features(ggml_v1 PUBLIC c_std_11) # don't bump
|
||||||
target_link_libraries(ggml_v1 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(ggml_v1 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(ggml_v1 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(ggml_v1 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
@ -447,7 +451,7 @@ add_library(ggml_v2
|
||||||
otherarch/ggml_v2.h
|
otherarch/ggml_v2.h
|
||||||
${GGML_V2_CUDA_SOURCES}
|
${GGML_V2_CUDA_SOURCES}
|
||||||
${GGML_V2_LEGACY_CUDA_SOURCES})
|
${GGML_V2_LEGACY_CUDA_SOURCES})
|
||||||
target_include_directories(ggml_v2 PUBLIC . ./otherarch ./otherarch/tools)
|
target_include_directories(ggml_v2 PUBLIC . ./include ./otherarch ./otherarch/tools)
|
||||||
target_compile_features(ggml_v2 PUBLIC c_std_11) # don't bump
|
target_compile_features(ggml_v2 PUBLIC c_std_11) # don't bump
|
||||||
target_link_libraries(ggml_v2 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(ggml_v2 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(ggml_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(ggml_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
@ -456,7 +460,7 @@ add_library(ggml_v3
|
||||||
otherarch/ggml_v3.c
|
otherarch/ggml_v3.c
|
||||||
otherarch/ggml_v3.h
|
otherarch/ggml_v3.h
|
||||||
${GGML_V3_CUDA_SOURCES})
|
${GGML_V3_CUDA_SOURCES})
|
||||||
target_include_directories(ggml_v3 PUBLIC . ./otherarch ./otherarch/tools)
|
target_include_directories(ggml_v3 PUBLIC . ./include ./otherarch ./otherarch/tools)
|
||||||
target_compile_features(ggml_v3 PUBLIC c_std_11) # don't bump
|
target_compile_features(ggml_v3 PUBLIC c_std_11) # don't bump
|
||||||
target_link_libraries(ggml_v3 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(ggml_v3 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(ggml_v3 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(ggml_v3 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
@ -472,31 +476,31 @@ add_library(common2
|
||||||
examples/llava/llava.h
|
examples/llava/llava.h
|
||||||
examples/llava/clip.cpp
|
examples/llava/clip.cpp
|
||||||
examples/llava/clip.h
|
examples/llava/clip.h
|
||||||
unicode.h
|
src/unicode.h
|
||||||
unicode.cpp
|
src/unicode.cpp
|
||||||
unicode-data.cpp)
|
src/unicode-data.cpp)
|
||||||
target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(common2 PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||||
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
|
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
|
||||||
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(sdtype_adapter
|
add_library(sdtype_adapter
|
||||||
otherarch/sdcpp/sdtype_adapter.cpp)
|
otherarch/sdcpp/sdtype_adapter.cpp)
|
||||||
target_include_directories(sdtype_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(sdtype_adapter PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||||
target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump
|
target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump
|
||||||
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(whisper_adapter
|
add_library(whisper_adapter
|
||||||
otherarch/whispercpp/whisper_adapter.cpp)
|
otherarch/whispercpp/whisper_adapter.cpp)
|
||||||
target_include_directories(whisper_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
|
target_include_directories(whisper_adapter PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
|
||||||
target_compile_features(whisper_adapter PUBLIC cxx_std_11) # don't bump
|
target_compile_features(whisper_adapter PUBLIC cxx_std_11) # don't bump
|
||||||
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
add_library(gpttype_adapter
|
add_library(gpttype_adapter
|
||||||
gpttype_adapter.cpp)
|
gpttype_adapter.cpp)
|
||||||
target_include_directories(gpttype_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(gpttype_adapter PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||||
target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
|
target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
|
||||||
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||||
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
@ -504,7 +508,7 @@ set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
if (LLAMA_CUBLAS)
|
if (LLAMA_CUBLAS)
|
||||||
set(TARGET koboldcpp_cublas)
|
set(TARGET koboldcpp_cublas)
|
||||||
add_library(${TARGET} SHARED expose.cpp expose.h)
|
add_library(${TARGET} SHARED expose.cpp expose.h)
|
||||||
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(${TARGET} PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||||
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
|
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
|
||||||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
|
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
|
||||||
|
@ -516,7 +520,7 @@ endif()
|
||||||
if (LLAMA_HIPBLAS)
|
if (LLAMA_HIPBLAS)
|
||||||
set(TARGET koboldcpp_hipblas)
|
set(TARGET koboldcpp_hipblas)
|
||||||
add_library(${TARGET} SHARED expose.cpp expose.h)
|
add_library(${TARGET} SHARED expose.cpp expose.h)
|
||||||
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
target_include_directories(${TARGET} PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||||
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
|
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
|
||||||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
|
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
|
||||||
|
|
82
Makefile
82
Makefile
|
@ -42,8 +42,8 @@ endif
|
||||||
#
|
#
|
||||||
|
|
||||||
# keep standard at C11 and C++11
|
# keep standard at C11 and C++11
|
||||||
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE
|
CFLAGS = -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE
|
||||||
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE
|
CXXFLAGS = -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE
|
||||||
LDFLAGS =
|
LDFLAGS =
|
||||||
FASTCFLAGS = $(subst -O3,-Ofast,$(CFLAGS))
|
FASTCFLAGS = $(subst -O3,-Ofast,$(CFLAGS))
|
||||||
FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS))
|
FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS))
|
||||||
|
@ -150,17 +150,17 @@ ifndef LLAMA_NO_ACCELERATE
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
|
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
|
||||||
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
|
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
|
||||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
|
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
|
||||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
|
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
|
||||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
|
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
|
||||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
|
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
|
||||||
|
|
||||||
ifdef LLAMA_CUBLAS
|
ifdef LLAMA_CUBLAS
|
||||||
CUBLAS_FLAGS = -DGGML_USE_CUDA -DSD_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
CUBLAS_FLAGS = -DGGML_USE_CUDA -DSD_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
CUBLASLD_FLAGS = -lcuda -lcublas -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/local/cuda/targets/sbsa-linux/lib -L/usr/lib/wsl/lib
|
CUBLASLD_FLAGS = -lcuda -lcublas -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/local/cuda/targets/sbsa-linux/lib -L/usr/lib/wsl/lib
|
||||||
CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||||
CUBLAS_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
CUBLAS_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||||
CUBLAS_OBJS += $(OBJS_CUDA_TEMP_INST)
|
CUBLAS_OBJS += $(OBJS_CUDA_TEMP_INST)
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||||
|
@ -214,9 +214,9 @@ ifdef LLAMA_CUDA_CCBIN
|
||||||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
|
ggml-cuda/%.o: ggml/src/ggml-cuda/%.cu ggml/include/ggml.h ggml/src/ggml-common.h ggml/src/ggml-cuda/common.cuh
|
||||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
ggml-cuda.o: ggml/src/ggml-cuda.cu ggml/include/ggml-cuda.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/src/ggml-backend-impl.h ggml/src/ggml-common.h $(wildcard ggml/src/ggml-cuda/*.cuh)
|
||||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||||
|
@ -244,7 +244,7 @@ ifdef LLAMA_HIPBLAS
|
||||||
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
||||||
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
|
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
|
||||||
HIP_OBJS += ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
HIP_OBJS += ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||||
HIP_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
HIP_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||||
HIP_OBJS += $(OBJS_CUDA_TEMP_INST)
|
HIP_OBJS += $(OBJS_CUDA_TEMP_INST)
|
||||||
|
|
||||||
HIPFLAGS2 += $(addprefix --offload-arch=,$(GPU_TARGETS))
|
HIPFLAGS2 += $(addprefix --offload-arch=,$(GPU_TARGETS))
|
||||||
|
@ -252,9 +252,9 @@ ifdef LLAMA_HIPBLAS
|
||||||
HIPFLAGS2 += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
HIPFLAGS2 += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
||||||
HIPFLAGS2 += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
HIPFLAGS2 += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||||
|
|
||||||
ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
|
ggml-cuda/%.o: ggml/src/ggml-cuda/%.cu ggml/include/ggml.h ggml/src/ggml-common.h ggml/src/ggml-cuda/common.cuh
|
||||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
ggml-cuda.o: ggml/src/ggml-cuda.cu ggml/include/ggml-cuda.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/src/ggml-backend-impl.h ggml/src/ggml-common.h $(wildcard ggml/src/ggml-cuda/*.cuh)
|
||||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
||||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
||||||
|
@ -273,7 +273,7 @@ ifdef LLAMA_METAL
|
||||||
|
|
||||||
ggml-metal.o: ggml-metal.m ggml-metal.h
|
ggml-metal.o: ggml-metal.m ggml-metal.h
|
||||||
@echo "== Preparing merged Metal file =="
|
@echo "== Preparing merged Metal file =="
|
||||||
@sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-merged.metal
|
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-merged.metal
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
endif # LLAMA_METAL
|
endif # LLAMA_METAL
|
||||||
|
|
||||||
|
@ -392,57 +392,57 @@ $(info )
|
||||||
# Build library
|
# Build library
|
||||||
#
|
#
|
||||||
|
|
||||||
ggml.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||||
ggml_v4_openblas.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml_v4_openblas.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
||||||
ggml_v4_failsafe.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml_v4_failsafe.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||||
ggml_v4_noavx2.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml_v4_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||||
ggml_v4_clblast.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml_v4_clblast.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
ggml_v4_cublas.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
ggml_v4_clblast_noavx2.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
ggml_v4_vulkan.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
ggml_v4_vulkan_noavx2.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
ggml_v4_vulkan_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#quants
|
#quants
|
||||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-cuda.h ggml-common.h
|
ggml-quants.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||||
ggml-quants_noavx2.o: ggml-quants.c ggml.h ggml-quants.h ggml-cuda.h ggml-common.h
|
ggml-quants_noavx2.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||||
ggml-quants_failsafe.o: ggml-quants.c ggml.h ggml-quants.h ggml-cuda.h ggml-common.h
|
ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||||
|
|
||||||
#sgemm
|
#sgemm
|
||||||
sgemm.o: sgemm.cpp sgemm.h ggml.h
|
sgemm.o: ggml/src/sgemm.cpp ggml/src/sgemm.h ggml/include/ggml.h
|
||||||
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||||
sgemm_noavx2.o: sgemm.cpp sgemm.h ggml.h
|
sgemm_noavx2.o: ggml/src/sgemm.cpp ggml/src/sgemm.h ggml/include/ggml.h
|
||||||
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||||
sgemm_failsafe.o: sgemm.cpp sgemm.h ggml.h
|
sgemm_failsafe.o: ggml/src/sgemm.cpp ggml/src/sgemm.h ggml/include/ggml.h
|
||||||
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||||
|
|
||||||
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
||||||
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
ggml-alloc.o: ggml/src/ggml-alloc.c ggml/include/ggml.h ggml/include/ggml-alloc.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
llava.o: examples/llava/llava.cpp examples/llava/llava.h
|
llava.o: examples/llava/llava.cpp examples/llava/llava.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
unicode.o: unicode.cpp unicode.h
|
unicode.o: src/unicode.cpp src/unicode.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
unicode-data.o: unicode-data.cpp unicode-data.h
|
unicode-data.o: src/unicode-data.cpp src/unicode-data.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
#these have special gpu defines
|
#these have special gpu defines
|
||||||
ggml-backend_default.o: ggml-backend.c ggml.h ggml-backend.h
|
ggml-backend_default.o: ggml/src/ggml-backend.c ggml/include/ggml.h ggml/include/ggml-backend.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
ggml-backend_vulkan.o: ggml-backend.c ggml.h ggml-backend.h
|
ggml-backend_vulkan.o: ggml/src/ggml-backend.c ggml/include/ggml.h ggml/include/ggml-backend.h
|
||||||
$(CC) $(CFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
ggml-backend_cublas.o: ggml-backend.c ggml.h ggml-backend.h
|
ggml-backend_cublas.o: ggml/src/ggml-backend.c ggml/include/ggml.h ggml/include/ggml-backend.h
|
||||||
$(CC) $(CFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
||||||
llavaclip_default.o: examples/llava/clip.cpp examples/llava/clip.h
|
llavaclip_default.o: examples/llava/clip.cpp examples/llava/clip.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
@ -450,7 +450,7 @@ llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h
|
||||||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#this is only used for openblas and accelerate
|
#this is only used for openblas and accelerate
|
||||||
ggml-blas.o: ggml-blas.cpp ggml-blas.h
|
ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
#version 3 libs
|
#version 3 libs
|
||||||
|
@ -502,11 +502,11 @@ ggml_v3-opencl.o: otherarch/ggml_v3-opencl.cpp otherarch/ggml_v3-opencl.h
|
||||||
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#vulkan
|
#vulkan
|
||||||
ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
|
ggml-vulkan.o: ggml/src/ggml-vulkan.cpp ggml/include/ggml-vulkan.h
|
||||||
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
# intermediate objects
|
# intermediate objects
|
||||||
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h otherarch/llama-util.h
|
llama.o: src/llama.cpp ggml/include/ggml.h ggml/include/ggml-alloc.h ggml/include/ggml-backend.h ggml/include/ggml-cuda.h ggml/include/ggml-metal.h include/llama.h otherarch/llama-util.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
common.o: common/common.cpp common/common.h common/log.h
|
common.o: common/common.cpp common/common.h common/log.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
@ -532,7 +532,7 @@ whispercpp_cublas.o: otherarch/whispercpp/whisper_adapter.cpp
|
||||||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
|
|
||||||
# idiotic "for easier compilation"
|
# idiotic "for easier compilation"
|
||||||
GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp llama.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml.h ggml-cuda.h llama.h otherarch/llama-util.h
|
GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp src/llama.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml/include/ggml.h ggml/include/ggml-cuda.h include/llama.h otherarch/llama-util.h
|
||||||
gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER)
|
gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER)
|
||||||
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@
|
||||||
gpttype_adapter.o: $(GPTTYPE_ADAPTER)
|
gpttype_adapter.o: $(GPTTYPE_ADAPTER)
|
||||||
|
@ -552,8 +552,8 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
|
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
|
||||||
rm -vrf ggml-cuda/*.o
|
rm -vrf ggml/src/ggml-cuda/*.o
|
||||||
rm -vrf ggml-cuda/template-instances/*.o
|
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
||||||
|
|
||||||
# useful tools
|
# useful tools
|
||||||
main: examples/main/main.cpp build-info.h ggml.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS)
|
main: examples/main/main.cpp build-info.h ggml.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS)
|
||||||
|
|
22
cmake/git-vars.cmake
Normal file
22
cmake/git-vars.cmake
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
find_package(Git)
|
||||||
|
|
||||||
|
# the commit's SHA1
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_SHA1
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
|
# the date of the commit
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_DATE
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
|
# the subject of the commit
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
@ -1,56 +0,0 @@
|
||||||
# dependencies
|
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
|
||||||
|
|
||||||
# third-party
|
|
||||||
|
|
||||||
# ...
|
|
||||||
|
|
||||||
# examples
|
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
|
|
||||||
if (EMSCRIPTEN)
|
|
||||||
else()
|
|
||||||
add_subdirectory(cvector-generator)
|
|
||||||
add_subdirectory(baby-llama)
|
|
||||||
add_subdirectory(batched-bench)
|
|
||||||
add_subdirectory(batched)
|
|
||||||
add_subdirectory(benchmark)
|
|
||||||
add_subdirectory(convert-llama2c-to-ggml)
|
|
||||||
add_subdirectory(embedding)
|
|
||||||
add_subdirectory(eval-callback)
|
|
||||||
add_subdirectory(export-lora)
|
|
||||||
add_subdirectory(finetune)
|
|
||||||
add_subdirectory(gbnf-validator)
|
|
||||||
add_subdirectory(gguf-split)
|
|
||||||
add_subdirectory(gguf)
|
|
||||||
add_subdirectory(gritlm)
|
|
||||||
add_subdirectory(imatrix)
|
|
||||||
add_subdirectory(infill)
|
|
||||||
add_subdirectory(llama-bench)
|
|
||||||
add_subdirectory(llava)
|
|
||||||
add_subdirectory(lookahead)
|
|
||||||
add_subdirectory(lookup)
|
|
||||||
add_subdirectory(main)
|
|
||||||
add_subdirectory(parallel)
|
|
||||||
add_subdirectory(passkey)
|
|
||||||
add_subdirectory(perplexity)
|
|
||||||
add_subdirectory(quantize-stats)
|
|
||||||
add_subdirectory(quantize)
|
|
||||||
add_subdirectory(retrieval)
|
|
||||||
if (LLAMA_RPC)
|
|
||||||
add_subdirectory(rpc)
|
|
||||||
endif()
|
|
||||||
if (LLAMA_BUILD_SERVER)
|
|
||||||
add_subdirectory(server)
|
|
||||||
endif()
|
|
||||||
if (LLAMA_SYCL)
|
|
||||||
add_subdirectory(sycl)
|
|
||||||
endif()
|
|
||||||
add_subdirectory(save-load-state)
|
|
||||||
add_subdirectory(simple)
|
|
||||||
add_subdirectory(speculative)
|
|
||||||
add_subdirectory(tokenize)
|
|
||||||
add_subdirectory(train-text-from-scratch)
|
|
||||||
endif()
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-baby-llama)
|
|
||||||
add_executable(${TARGET} baby-llama.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-batched-bench)
|
|
||||||
add_executable(${TARGET} batched-bench.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-batched)
|
|
||||||
add_executable(${TARGET} batched.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,6 +0,0 @@
|
||||||
set(TARGET llama-bench-matmult)
|
|
||||||
add_executable(${TARGET} benchmark-matmult.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-convert-llama2c-to-ggml)
|
|
||||||
add_executable(${TARGET} convert-llama2c-to-ggml.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-cvector-generator)
|
|
||||||
add_executable(${TARGET} cvector-generator.cpp pca.hpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-embedding)
|
|
||||||
add_executable(${TARGET} embedding.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,9 +0,0 @@
|
||||||
set(TARGET llama-eval-callback)
|
|
||||||
add_executable(${TARGET} eval-callback.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
||||||
|
|
||||||
set(TEST_TARGET test-eval-callback)
|
|
||||||
add_test(NAME ${TEST_TARGET} COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
|
|
||||||
set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-export-lora)
|
|
||||||
add_executable(${TARGET} export-lora.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-finetune)
|
|
||||||
add_executable(${TARGET} finetune.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-gbnf-validator)
|
|
||||||
add_executable(${TARGET} gbnf-validator.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-gguf-split)
|
|
||||||
add_executable(${TARGET} gguf-split.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-gguf)
|
|
||||||
add_executable(${TARGET} gguf.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-gritlm)
|
|
||||||
add_executable(${TARGET} gritlm.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-imatrix)
|
|
||||||
add_executable(${TARGET} imatrix.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -25,7 +25,7 @@ For faster computation, make sure to use GPU offloading via the `-ngl` argument
|
||||||
## Example
|
## Example
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
LLAMA_CUDA=1 make -j
|
GGML_CUDA=1 make -j
|
||||||
|
|
||||||
# generate importance matrix (imatrix.dat)
|
# generate importance matrix (imatrix.dat)
|
||||||
./llama-imatrix -m ggml-model-f16.gguf -f train-data.txt -ngl 99
|
./llama-imatrix -m ggml-model-f16.gguf -f train-data.txt -ngl 99
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-infill)
|
|
||||||
add_executable(${TARGET} infill.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-bench)
|
|
||||||
add_executable(${TARGET} llama-bench.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,55 +0,0 @@
|
||||||
|
|
||||||
# For more information about using CMake with Android Studio, read the
|
|
||||||
# documentation: https://d.android.com/studio/projects/add-native-code.html.
|
|
||||||
# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
|
|
||||||
|
|
||||||
# Sets the minimum CMake version required for this project.
|
|
||||||
cmake_minimum_required(VERSION 3.22.1)
|
|
||||||
|
|
||||||
# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
|
|
||||||
# Since this is the top level CMakeLists.txt, the project name is also accessible
|
|
||||||
# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
|
|
||||||
# build script scope).
|
|
||||||
project("llama-android")
|
|
||||||
|
|
||||||
## Fetch latest llama.cpp from GitHub
|
|
||||||
#include(FetchContent)
|
|
||||||
#FetchContent_Declare(
|
|
||||||
# llama
|
|
||||||
# GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
|
|
||||||
# GIT_TAG master
|
|
||||||
#)
|
|
||||||
#
|
|
||||||
## Also provides "common"
|
|
||||||
#FetchContent_MakeAvailable(llama)
|
|
||||||
|
|
||||||
# llama.cpp CI uses the code from the current branch
|
|
||||||
# ref: https://github.com/ggerganov/llama.cpp/pull/7341#issuecomment-2117617700
|
|
||||||
add_subdirectory(../../../../../../ build-llama)
|
|
||||||
|
|
||||||
# Creates and names a library, sets it as either STATIC
|
|
||||||
# or SHARED, and provides the relative paths to its source code.
|
|
||||||
# You can define multiple libraries, and CMake builds them for you.
|
|
||||||
# Gradle automatically packages shared libraries with your APK.
|
|
||||||
#
|
|
||||||
# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
|
|
||||||
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
|
||||||
# is preferred for the same purpose.
|
|
||||||
#
|
|
||||||
# In order to load a library into your app from Java/Kotlin, you must call
|
|
||||||
# System.loadLibrary() and pass the name of the library defined here;
|
|
||||||
# for GameActivity/NativeActivity derived applications, the same library name must be
|
|
||||||
# used in the AndroidManifest.xml file.
|
|
||||||
add_library(${CMAKE_PROJECT_NAME} SHARED
|
|
||||||
# List C/C++ source files with relative paths to this CMakeLists.txt.
|
|
||||||
llama-android.cpp)
|
|
||||||
|
|
||||||
# Specifies libraries CMake should link to your target library. You
|
|
||||||
# can link libraries from various origins, such as libraries defined in this
|
|
||||||
# build script, prebuilt third-party libraries, or Android system libraries.
|
|
||||||
target_link_libraries(${CMAKE_PROJECT_NAME}
|
|
||||||
# List libraries link to the target library
|
|
||||||
llama
|
|
||||||
common
|
|
||||||
android
|
|
||||||
log)
|
|
|
@ -1,49 +0,0 @@
|
||||||
# For more information about using CMake with Android Studio, read the
|
|
||||||
# documentation: https://d.android.com/studio/projects/add-native-code.html.
|
|
||||||
# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
|
|
||||||
|
|
||||||
# Sets the minimum CMake version required for this project.
|
|
||||||
cmake_minimum_required(VERSION 3.22.1)
|
|
||||||
|
|
||||||
# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
|
|
||||||
# Since this is the top level CMakeLists.txt, the project name is also accessible
|
|
||||||
# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
|
|
||||||
# build script scope).
|
|
||||||
project("llama-android")
|
|
||||||
|
|
||||||
include(FetchContent)
|
|
||||||
FetchContent_Declare(
|
|
||||||
llama
|
|
||||||
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
|
|
||||||
GIT_TAG master
|
|
||||||
)
|
|
||||||
|
|
||||||
# Also provides "common"
|
|
||||||
FetchContent_MakeAvailable(llama)
|
|
||||||
|
|
||||||
# Creates and names a library, sets it as either STATIC
|
|
||||||
# or SHARED, and provides the relative paths to its source code.
|
|
||||||
# You can define multiple libraries, and CMake builds them for you.
|
|
||||||
# Gradle automatically packages shared libraries with your APK.
|
|
||||||
#
|
|
||||||
# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
|
|
||||||
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
|
||||||
# is preferred for the same purpose.
|
|
||||||
#
|
|
||||||
# In order to load a library into your app from Java/Kotlin, you must call
|
|
||||||
# System.loadLibrary() and pass the name of the library defined here;
|
|
||||||
# for GameActivity/NativeActivity derived applications, the same library name must be
|
|
||||||
# used in the AndroidManifest.xml file.
|
|
||||||
add_library(${CMAKE_PROJECT_NAME} SHARED
|
|
||||||
# List C/C++ source files with relative paths to this CMakeLists.txt.
|
|
||||||
llama-android.cpp)
|
|
||||||
|
|
||||||
# Specifies libraries CMake should link to your target library. You
|
|
||||||
# can link libraries from various origins, such as libraries defined in this
|
|
||||||
# build script, prebuilt third-party libraries, or Android system libraries.
|
|
||||||
target_link_libraries(${CMAKE_PROJECT_NAME}
|
|
||||||
# List libraries link to the target library
|
|
||||||
llama
|
|
||||||
common
|
|
||||||
android
|
|
||||||
log)
|
|
|
@ -1,38 +0,0 @@
|
||||||
add_library(llava OBJECT
|
|
||||||
llava.cpp
|
|
||||||
llava.h
|
|
||||||
clip.cpp
|
|
||||||
clip.h
|
|
||||||
)
|
|
||||||
|
|
||||||
target_link_libraries(llava PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
|
|
||||||
target_include_directories(llava PUBLIC .)
|
|
||||||
target_include_directories(llava PUBLIC ../..)
|
|
||||||
target_include_directories(llava PUBLIC ../../common)
|
|
||||||
|
|
||||||
target_compile_features(llava PRIVATE cxx_std_11)
|
|
||||||
|
|
||||||
add_library(llava_static STATIC $<TARGET_OBJECTS:llava>)
|
|
||||||
if (BUILD_SHARED_LIBS)
|
|
||||||
set_target_properties(llava PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
||||||
target_compile_definitions(llava PRIVATE LLAMA_SHARED LLAMA_BUILD)
|
|
||||||
add_library(llava_shared SHARED $<TARGET_OBJECTS:llava>)
|
|
||||||
target_link_libraries(llava_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
install(TARGETS llava_shared LIBRARY)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (NOT MSVC)
|
|
||||||
target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(TARGET BUILD_INFO)
|
|
||||||
add_dependencies(llava BUILD_INFO)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(TARGET llama-llava-cli)
|
|
||||||
add_executable(${TARGET} llava-cli.cpp)
|
|
||||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-cli)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -194,7 +194,7 @@ llama_print_timings: total time = 44411.01 ms / 377 tokens
|
||||||
## Orin compile and run
|
## Orin compile and run
|
||||||
### compile
|
### compile
|
||||||
```sh
|
```sh
|
||||||
make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
|
make GGML_CUDA=1 CUDA_DOCKER_ARCH=sm_87 GGML_CUDA_F16=1 -j 32
|
||||||
```
|
```
|
||||||
### run on Orin
|
### run on Orin
|
||||||
### case 1
|
### case 1
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-lookahead)
|
|
||||||
add_executable(${TARGET} lookahead.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,23 +0,0 @@
|
||||||
set(TARGET llama-lookup)
|
|
||||||
add_executable(${TARGET} lookup.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
||||||
|
|
||||||
set(TARGET llama-lookup-create)
|
|
||||||
add_executable(${TARGET} lookup-create.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
||||||
|
|
||||||
set(TARGET llama-lookup-merge)
|
|
||||||
add_executable(${TARGET} lookup-merge.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
||||||
|
|
||||||
set(TARGET llama-lookup-stats)
|
|
||||||
add_executable(${TARGET} lookup-stats.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,33 +0,0 @@
|
||||||
cmake_minimum_required(VERSION 3.12)
|
|
||||||
project("llama-cli-cmake-pkg" C CXX)
|
|
||||||
set(TARGET llama-cli-cmake-pkg)
|
|
||||||
|
|
||||||
find_package(Llama 0.0.1 REQUIRED)
|
|
||||||
|
|
||||||
# Bake common functionality in with target. Because applications
|
|
||||||
# using the relocatable Llama package should be outside of the
|
|
||||||
# source tree, llama-cli-cmake-pkg pretends the dependencies are built-in.
|
|
||||||
set(_common_path "${CMAKE_CURRENT_LIST_DIR}/../../common")
|
|
||||||
add_library(common OBJECT)
|
|
||||||
file(GLOB _common_files
|
|
||||||
"${_common_path}/*.h"
|
|
||||||
"${_common_path}/*.cpp"
|
|
||||||
)
|
|
||||||
target_sources(common PRIVATE ${_common_files})
|
|
||||||
|
|
||||||
# If the common project was part of "llama-cli-cmake-pkg" the transient
|
|
||||||
# defines would automatically be attached. Because the common func-
|
|
||||||
# tionality is separate, but dependent upon the defines, it must be
|
|
||||||
# explicitly extracted from the "llama" target.
|
|
||||||
#
|
|
||||||
get_target_property(_llama_transient_defines llama
|
|
||||||
INTERFACE_COMPILE_DEFINITIONS)
|
|
||||||
|
|
||||||
target_compile_definitions(common PRIVATE "${_llama_transient_defines}")
|
|
||||||
|
|
||||||
add_executable(${TARGET} ${CMAKE_CURRENT_LIST_DIR}/../main/main.cpp)
|
|
||||||
target_include_directories(${TARGET} PRIVATE ${_common_path})
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-cli)
|
|
||||||
add_executable(${TARGET} main.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-parallel)
|
|
||||||
add_executable(${TARGET} parallel.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-passkey)
|
|
||||||
add_executable(${TARGET} passkey.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-perplexity)
|
|
||||||
add_executable(${TARGET} perplexity.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,6 +0,0 @@
|
||||||
set(TARGET llama-quantize-stats)
|
|
||||||
add_executable(${TARGET} quantize-stats.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,6 +0,0 @@
|
||||||
set(TARGET llama-quantize)
|
|
||||||
add_executable(${TARGET} quantize.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-retrieval)
|
|
||||||
add_executable(${TARGET} retrieval.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,2 +0,0 @@
|
||||||
add_executable(rpc-server rpc-server.cpp)
|
|
||||||
target_link_libraries(rpc-server PRIVATE ggml llama)
|
|
|
@ -29,13 +29,13 @@ You can also run multiple `rpc-server` instances on the same host, each with a d
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
On each host, build the corresponding backend with `cmake` and add `-DLLAMA_RPC=ON` to the build options.
|
On each host, build the corresponding backend with `cmake` and add `-DGGML_RPC=ON` to the build options.
|
||||||
For example, to build the CUDA backend with RPC support:
|
For example, to build the CUDA backend with RPC support:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir build-rpc-cuda
|
mkdir build-rpc-cuda
|
||||||
cd build-rpc-cuda
|
cd build-rpc-cuda
|
||||||
cmake .. -DLLAMA_CUDA=ON -DLLAMA_RPC=ON
|
cmake .. -DGGML_CUDA=ON -DGGML_RPC=ON
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -58,12 +58,12 @@ $ CUDA_VISIBLE_DEVICES=0 bin/rpc-server -p 50052
|
||||||
This way you can run multiple `rpc-server` instances on the same host, each with a different CUDA device.
|
This way you can run multiple `rpc-server` instances on the same host, each with a different CUDA device.
|
||||||
|
|
||||||
|
|
||||||
On the main host build `llama.cpp` only with `-DLLAMA_RPC=ON`:
|
On the main host build `llama.cpp` only with `-DGGML_RPC=ON`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir build-rpc
|
mkdir build-rpc
|
||||||
cd build-rpc
|
cd build-rpc
|
||||||
cmake .. -DLLAMA_RPC=ON
|
cmake .. -DGGML_RPC=ON
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-save-load-state)
|
|
||||||
add_executable(${TARGET} save-load-state.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,51 +0,0 @@
|
||||||
set(TARGET llama-server)
|
|
||||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
|
||||||
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
|
||||||
set(TARGET_SRCS
|
|
||||||
server.cpp
|
|
||||||
utils.hpp
|
|
||||||
httplib.h
|
|
||||||
)
|
|
||||||
set(PUBLIC_ASSETS
|
|
||||||
colorthemes.css
|
|
||||||
style.css
|
|
||||||
theme-beeninorder.css
|
|
||||||
theme-ketivah.css
|
|
||||||
theme-mangotango.css
|
|
||||||
theme-playground.css
|
|
||||||
theme-polarnight.css
|
|
||||||
theme-snowstorm.css
|
|
||||||
index.html
|
|
||||||
index-new.html
|
|
||||||
index.js
|
|
||||||
completion.js
|
|
||||||
system-prompts.js
|
|
||||||
prompt-formats.js
|
|
||||||
json-schema-to-grammar.mjs
|
|
||||||
)
|
|
||||||
foreach(asset ${PUBLIC_ASSETS})
|
|
||||||
set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
|
|
||||||
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
|
|
||||||
list(APPEND TARGET_SRCS ${output})
|
|
||||||
add_custom_command(
|
|
||||||
DEPENDS "${input}"
|
|
||||||
OUTPUT "${output}"
|
|
||||||
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
|
|
||||||
)
|
|
||||||
endforeach()
|
|
||||||
add_executable(${TARGET} ${TARGET_SRCS})
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_compile_definitions(${TARGET} PRIVATE
|
|
||||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
|
||||||
)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
if (LLAMA_SERVER_SSL)
|
|
||||||
find_package(OpenSSL REQUIRED)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
|
||||||
target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
|
|
||||||
endif()
|
|
||||||
if (WIN32)
|
|
||||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
|
||||||
endif()
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-simple)
|
|
||||||
add_executable(${TARGET} simple.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-speculative)
|
|
||||||
add_executable(${TARGET} speculative.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,9 +0,0 @@
|
||||||
# MIT license
|
|
||||||
# Copyright (C) 2024 Intel Corporation
|
|
||||||
# SPDX-License-Identifier: MIT
|
|
||||||
|
|
||||||
set(TARGET llama-ls-sycl-device)
|
|
||||||
add_executable(${TARGET} ls-sycl-device.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
|
@ -8,10 +8,10 @@ cd build
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
#for FP16
|
#for FP16
|
||||||
#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
|
#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON # faster for long-prompt inference
|
||||||
|
|
||||||
#for FP32
|
#for FP32
|
||||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
#build example/main
|
#build example/main
|
||||||
#cmake --build . --config Release --target main
|
#cmake --build . --config Release --target main
|
||||||
|
|
|
@ -13,10 +13,10 @@ if %errorlevel% neq 0 goto ERROR
|
||||||
|
|
||||||
:: for FP16
|
:: for FP16
|
||||||
:: faster for long-prompt inference
|
:: faster for long-prompt inference
|
||||||
:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
:: cmake -G "MinGW Makefiles" .. -DGGML_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
|
||||||
|
|
||||||
:: for FP32
|
:: for FP32
|
||||||
cmake -G "Ninja" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
|
cmake -G "Ninja" .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
|
||||||
if %errorlevel% neq 0 goto ERROR
|
if %errorlevel% neq 0 goto ERROR
|
||||||
:: build example/main only
|
:: build example/main only
|
||||||
:: make main
|
:: make main
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-tokenize)
|
|
||||||
add_executable(${TARGET} tokenize.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -1,5 +0,0 @@
|
||||||
set(TARGET llama-train-text-from-scratch)
|
|
||||||
add_executable(${TARGET} train-text-from-scratch.cpp)
|
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
|
@ -8,7 +8,9 @@
|
||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
#include "ggml-sycl/presets.hpp"
|
|
||||||
|
#define GGML_SYCL_NAME "SYCL"
|
||||||
|
#define GGML_SYCL_MAX_DEVICES 48
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
|
@ -603,7 +603,7 @@ static void on_no_fattn_vec_case(const int D) {
|
||||||
if (D == 64) {
|
if (D == 64) {
|
||||||
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
|
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
|
||||||
fprintf(stderr, "By default only f16 KV cache is supported.\n");
|
fprintf(stderr, "By default only f16 KV cache is supported.\n");
|
||||||
fprintf(stderr, "Compile with LLAMA_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
} else if (D == 128) {
|
} else if (D == 128) {
|
||||||
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
|
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
|
||||||
|
@ -611,7 +611,7 @@ static void on_no_fattn_vec_case(const int D) {
|
||||||
fprintf(stderr, " - K == q4_0, V == q4_0, 4.50 BPV\n");
|
fprintf(stderr, " - K == q4_0, V == q4_0, 4.50 BPV\n");
|
||||||
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
|
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
|
||||||
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
|
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
|
||||||
fprintf(stderr, "Compile with LLAMA_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
|
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue