mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 17:44:38 +00:00
merge the file structure refactor, testing
This commit is contained in:
commit
9c10486204
315 changed files with 124 additions and 568 deletions
|
@ -49,7 +49,7 @@ option(LLAMA_CUDA_F16 "llama: use 16 bit floats for dmmv
|
|||
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
|
||||
set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
||||
"llama: max. batch size for using peer access")
|
||||
set(GGML_CUDA_USE_GRAPHS ON)
|
||||
set(GGML_CUDA_USE_GRAPHS OFF)
|
||||
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
|
||||
|
||||
# Other
|
||||
|
@ -69,16 +69,20 @@ find_package(Threads REQUIRED)
|
|||
add_compile_definitions(LOG_DISABLE_LOGS)
|
||||
add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
|
||||
|
||||
file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu")
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
|
||||
file(GLOB GGML_SOURCES_CUDA "ggml/src/ggml-cuda/*.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA "ggml/src/ggml-cuda.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||
set(GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
|
||||
set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
|
||||
set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
|
||||
|
||||
if (GGML_CUDA_USE_GRAPHS)
|
||||
add_compile_definitions(GGML_CUDA_USE_GRAPHS)
|
||||
endif()
|
||||
|
||||
if (LLAMA_CUBLAS)
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
|
||||
|
@ -102,11 +106,11 @@ if (LLAMA_CUBLAS)
|
|||
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE})
|
||||
|
||||
# only build minimal quants required for fattn quant kv
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
||||
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
||||
|
||||
if (LLAMA_STATIC)
|
||||
|
@ -167,11 +171,11 @@ if (LLAMA_HIPBLAS)
|
|||
|
||||
if (${hipblas_FOUND} AND ${hip_FOUND})
|
||||
message(STATUS "HIP and hipBLAS found")
|
||||
file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu")
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu")
|
||||
file(GLOB GGML_SOURCES_ROCM "ggml/src/ggml-cuda/*.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA SD_USE_CUBLAS)
|
||||
add_library(ggml-rocm ${GGML_SOURCES_CUDA})
|
||||
|
@ -179,11 +183,11 @@ if (LLAMA_HIPBLAS)
|
|||
target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
|
||||
endif()
|
||||
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
||||
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
|
||||
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
||||
|
||||
# only build minimal quants required for fattn quant kv
|
||||
|
@ -418,18 +422,18 @@ endif()
|
|||
#
|
||||
|
||||
add_library(ggml
|
||||
ggml.c
|
||||
ggml.h
|
||||
ggml-alloc.c
|
||||
ggml-alloc.h
|
||||
ggml-backend.c
|
||||
ggml-backend.h
|
||||
ggml-quants.c
|
||||
ggml-quants.h
|
||||
sgemm.cpp
|
||||
sgemm.h
|
||||
ggml/src/ggml.c
|
||||
ggml/include/ggml.h
|
||||
ggml/src/ggml-alloc.c
|
||||
ggml/include/ggml-alloc.h
|
||||
ggml/src/ggml-backend.c
|
||||
ggml/include/ggml-backend.h
|
||||
ggml/src/ggml-quants.c
|
||||
ggml/src/ggml-quants.h
|
||||
ggml/src/sgemm.cpp
|
||||
ggml/src/sgemm.h
|
||||
${GGML_SOURCES_CUDA})
|
||||
target_include_directories(ggml PUBLIC . ./otherarch ./otherarch/tools)
|
||||
target_include_directories(ggml PUBLIC . ./include ./otherarch ./otherarch/tools)
|
||||
target_compile_features(ggml PUBLIC c_std_11) # don't bump
|
||||
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
@ -437,7 +441,7 @@ set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|||
add_library(ggml_v1
|
||||
otherarch/ggml_v1.c
|
||||
otherarch/ggml_v1.h)
|
||||
target_include_directories(ggml_v1 PUBLIC . ./otherarch ./otherarch/tools)
|
||||
target_include_directories(ggml_v1 PUBLIC . ./include ./otherarch ./otherarch/tools)
|
||||
target_compile_features(ggml_v1 PUBLIC c_std_11) # don't bump
|
||||
target_link_libraries(ggml_v1 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(ggml_v1 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
@ -447,7 +451,7 @@ add_library(ggml_v2
|
|||
otherarch/ggml_v2.h
|
||||
${GGML_V2_CUDA_SOURCES}
|
||||
${GGML_V2_LEGACY_CUDA_SOURCES})
|
||||
target_include_directories(ggml_v2 PUBLIC . ./otherarch ./otherarch/tools)
|
||||
target_include_directories(ggml_v2 PUBLIC . ./include ./otherarch ./otherarch/tools)
|
||||
target_compile_features(ggml_v2 PUBLIC c_std_11) # don't bump
|
||||
target_link_libraries(ggml_v2 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(ggml_v2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
@ -456,7 +460,7 @@ add_library(ggml_v3
|
|||
otherarch/ggml_v3.c
|
||||
otherarch/ggml_v3.h
|
||||
${GGML_V3_CUDA_SOURCES})
|
||||
target_include_directories(ggml_v3 PUBLIC . ./otherarch ./otherarch/tools)
|
||||
target_include_directories(ggml_v3 PUBLIC . ./include ./otherarch ./otherarch/tools)
|
||||
target_compile_features(ggml_v3 PUBLIC c_std_11) # don't bump
|
||||
target_link_libraries(ggml_v3 PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(ggml_v3 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
@ -472,31 +476,31 @@ add_library(common2
|
|||
examples/llava/llava.h
|
||||
examples/llava/clip.cpp
|
||||
examples/llava/clip.h
|
||||
unicode.h
|
||||
unicode.cpp
|
||||
unicode-data.cpp)
|
||||
target_include_directories(common2 PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
src/unicode.h
|
||||
src/unicode.cpp
|
||||
src/unicode-data.cpp)
|
||||
target_include_directories(common2 PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_compile_features(common2 PUBLIC cxx_std_11) # don't bump
|
||||
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(common2 PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
add_library(sdtype_adapter
|
||||
otherarch/sdcpp/sdtype_adapter.cpp)
|
||||
target_include_directories(sdtype_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_include_directories(sdtype_adapter PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_compile_features(sdtype_adapter PUBLIC cxx_std_11) # don't bump
|
||||
target_link_libraries(sdtype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(sdtype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
add_library(whisper_adapter
|
||||
otherarch/whispercpp/whisper_adapter.cpp)
|
||||
target_include_directories(whisper_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
|
||||
target_include_directories(whisper_adapter PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/whispercpp ./examples ./common)
|
||||
target_compile_features(whisper_adapter PUBLIC cxx_std_11) # don't bump
|
||||
target_link_libraries(whisper_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(whisper_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
add_library(gpttype_adapter
|
||||
gpttype_adapter.cpp)
|
||||
target_include_directories(gpttype_adapter PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_include_directories(gpttype_adapter PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_compile_features(gpttype_adapter PUBLIC cxx_std_11) # don't bump
|
||||
target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
||||
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
@ -504,7 +508,7 @@ set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|||
if (LLAMA_CUBLAS)
|
||||
set(TARGET koboldcpp_cublas)
|
||||
add_library(${TARGET} SHARED expose.cpp expose.h)
|
||||
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_include_directories(${TARGET} PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
|
||||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
|
||||
|
@ -516,7 +520,7 @@ endif()
|
|||
if (LLAMA_HIPBLAS)
|
||||
set(TARGET koboldcpp_hipblas)
|
||||
add_library(${TARGET} SHARED expose.cpp expose.h)
|
||||
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_include_directories(${TARGET} PUBLIC . ./include ./otherarch ./otherarch/tools ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./examples ./common)
|
||||
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
|
||||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_hipblas")
|
||||
|
|
82
Makefile
82
Makefile
|
@ -42,8 +42,8 @@ endif
|
|||
#
|
||||
|
||||
# keep standard at C11 and C++11
|
||||
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE
|
||||
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE
|
||||
CFLAGS = -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE
|
||||
CXXFLAGS = -I. -Iggml/include -Iggml/src -Iinclude -Isrc -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_LLAMAFILE
|
||||
LDFLAGS =
|
||||
FASTCFLAGS = $(subst -O3,-Ofast,$(CFLAGS))
|
||||
FASTCXXFLAGS = $(subst -O3,-Ofast,$(CXXFLAGS))
|
||||
|
@ -150,17 +150,17 @@ ifndef LLAMA_NO_ACCELERATE
|
|||
endif
|
||||
|
||||
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
|
||||
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/mmq*.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
|
||||
OBJS_CUDA_TEMP_INST = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
|
||||
OBJS_CUDA_TEMP_INST += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
|
||||
|
||||
ifdef LLAMA_CUBLAS
|
||||
CUBLAS_FLAGS = -DGGML_USE_CUDA -DSD_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||
CUBLASLD_FLAGS = -lcuda -lcublas -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/local/cuda/targets/sbsa-linux/lib -L/usr/lib/wsl/lib
|
||||
CUBLAS_OBJS = ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||
CUBLAS_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
||||
CUBLAS_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||
CUBLAS_OBJS += $(OBJS_CUDA_TEMP_INST)
|
||||
NVCC = nvcc
|
||||
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||
|
@ -214,9 +214,9 @@ ifdef LLAMA_CUDA_CCBIN
|
|||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||
endif
|
||||
|
||||
ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
|
||||
ggml-cuda/%.o: ggml/src/ggml-cuda/%.cu ggml/include/ggml.h ggml/src/ggml-common.h ggml/src/ggml-cuda/common.cuh
|
||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
||||
ggml-cuda.o: ggml/src/ggml-cuda.cu ggml/include/ggml-cuda.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/src/ggml-backend-impl.h ggml/src/ggml-common.h $(wildcard ggml/src/ggml-cuda/*.cuh)
|
||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
|
@ -244,7 +244,7 @@ ifdef LLAMA_HIPBLAS
|
|||
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA -DSD_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
||||
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
|
||||
HIP_OBJS += ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||
HIP_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu))
|
||||
HIP_OBJS += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
|
||||
HIP_OBJS += $(OBJS_CUDA_TEMP_INST)
|
||||
|
||||
HIPFLAGS2 += $(addprefix --offload-arch=,$(GPU_TARGETS))
|
||||
|
@ -252,9 +252,9 @@ ifdef LLAMA_HIPBLAS
|
|||
HIPFLAGS2 += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
||||
HIPFLAGS2 += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
|
||||
ggml-cuda/%.o: ggml-cuda/%.cu ggml.h ggml-common.h ggml-cuda/common.cuh
|
||||
ggml-cuda/%.o: ggml/src/ggml-cuda/%.cu ggml/include/ggml.h ggml/src/ggml-common.h ggml/src/ggml-cuda/common.cuh
|
||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
|
||||
ggml-cuda.o: ggml/src/ggml-cuda.cu ggml/include/ggml-cuda.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/src/ggml-backend-impl.h ggml/src/ggml-common.h $(wildcard ggml/src/ggml-cuda/*.cuh)
|
||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) $(HIPFLAGS2) -x hip -c -o $@ $<
|
||||
|
@ -273,7 +273,7 @@ ifdef LLAMA_METAL
|
|||
|
||||
ggml-metal.o: ggml-metal.m ggml-metal.h
|
||||
@echo "== Preparing merged Metal file =="
|
||||
@sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-merged.metal
|
||||
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-merged.metal
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
endif # LLAMA_METAL
|
||||
|
||||
|
@ -392,57 +392,57 @@ $(info )
|
|||
# Build library
|
||||
#
|
||||
|
||||
ggml.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||
ggml_v4_openblas.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml_v4_openblas.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
||||
ggml_v4_failsafe.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml_v4_failsafe.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||
ggml_v4_noavx2.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml_v4_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||
ggml_v4_clblast.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml_v4_clblast.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||
ggml_v4_cublas.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||
ggml_v4_clblast_noavx2.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||
ggml_v4_vulkan.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||
ggml_v4_vulkan_noavx2.o: ggml.c ggml.h ggml-cuda.h ggml-common.h
|
||||
ggml_v4_vulkan_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h
|
||||
$(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||
|
||||
#quants
|
||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-cuda.h ggml-common.h
|
||||
ggml-quants.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||
ggml-quants_noavx2.o: ggml-quants.c ggml.h ggml-quants.h ggml-cuda.h ggml-common.h
|
||||
ggml-quants_noavx2.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||
ggml-quants_failsafe.o: ggml-quants.c ggml.h ggml-quants.h ggml-cuda.h ggml-common.h
|
||||
ggml-quants_failsafe.o: ggml/src/ggml-quants.c ggml/include/ggml.h ggml/src/ggml-quants.h ggml/src/ggml-common.h
|
||||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||
|
||||
#sgemm
|
||||
sgemm.o: sgemm.cpp sgemm.h ggml.h
|
||||
sgemm.o: ggml/src/sgemm.cpp ggml/src/sgemm.h ggml/include/ggml.h
|
||||
$(CXX) $(CXXFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||
sgemm_noavx2.o: sgemm.cpp sgemm.h ggml.h
|
||||
sgemm_noavx2.o: ggml/src/sgemm.cpp ggml/src/sgemm.h ggml/include/ggml.h
|
||||
$(CXX) $(CXXFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||
sgemm_failsafe.o: sgemm.cpp sgemm.h ggml.h
|
||||
sgemm_failsafe.o: ggml/src/sgemm.cpp ggml/src/sgemm.h ggml/include/ggml.h
|
||||
$(CXX) $(CXXFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||
|
||||
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
||||
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||
ggml-alloc.o: ggml/src/ggml-alloc.c ggml/include/ggml.h ggml/include/ggml-alloc.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
llava.o: examples/llava/llava.cpp examples/llava/llava.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
unicode.o: unicode.cpp unicode.h
|
||||
unicode.o: src/unicode.cpp src/unicode.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
unicode-data.o: unicode-data.cpp unicode-data.h
|
||||
unicode-data.o: src/unicode-data.cpp src/unicode-data.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
#these have special gpu defines
|
||||
ggml-backend_default.o: ggml-backend.c ggml.h ggml-backend.h
|
||||
ggml-backend_default.o: ggml/src/ggml-backend.c ggml/include/ggml.h ggml/include/ggml-backend.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
ggml-backend_vulkan.o: ggml-backend.c ggml.h ggml-backend.h
|
||||
ggml-backend_vulkan.o: ggml/src/ggml-backend.c ggml/include/ggml.h ggml/include/ggml-backend.h
|
||||
$(CC) $(CFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||
ggml-backend_cublas.o: ggml-backend.c ggml.h ggml-backend.h
|
||||
ggml-backend_cublas.o: ggml/src/ggml-backend.c ggml/include/ggml.h ggml/include/ggml-backend.h
|
||||
$(CC) $(CFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
||||
llavaclip_default.o: examples/llava/clip.cpp examples/llava/clip.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
@ -450,7 +450,7 @@ llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h
|
|||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
||||
|
||||
#this is only used for openblas and accelerate
|
||||
ggml-blas.o: ggml-blas.cpp ggml-blas.h
|
||||
ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
#version 3 libs
|
||||
|
@ -502,11 +502,11 @@ ggml_v3-opencl.o: otherarch/ggml_v3-opencl.cpp otherarch/ggml_v3-opencl.h
|
|||
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||
|
||||
#vulkan
|
||||
ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h
|
||||
ggml-vulkan.o: ggml/src/ggml-vulkan.cpp ggml/include/ggml-vulkan.h
|
||||
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||
|
||||
# intermediate objects
|
||||
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h otherarch/llama-util.h
|
||||
llama.o: src/llama.cpp ggml/include/ggml.h ggml/include/ggml-alloc.h ggml/include/ggml-backend.h ggml/include/ggml-cuda.h ggml/include/ggml-metal.h include/llama.h otherarch/llama-util.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
common.o: common/common.cpp common/common.h common/log.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
@ -532,7 +532,7 @@ whispercpp_cublas.o: otherarch/whispercpp/whisper_adapter.cpp
|
|||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||
|
||||
# idiotic "for easier compilation"
|
||||
GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp llama.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml.h ggml-cuda.h llama.h otherarch/llama-util.h
|
||||
GPTTYPE_ADAPTER = gpttype_adapter.cpp otherarch/llama_v2.cpp otherarch/llama_v3.cpp src/llama.cpp otherarch/utils.cpp otherarch/gptj_v1.cpp otherarch/gptj_v2.cpp otherarch/gptj_v3.cpp otherarch/gpt2_v1.cpp otherarch/gpt2_v2.cpp otherarch/gpt2_v3.cpp otherarch/rwkv_v2.cpp otherarch/rwkv_v3.cpp otherarch/neox_v2.cpp otherarch/neox_v3.cpp otherarch/mpt_v3.cpp ggml/include/ggml.h ggml/include/ggml-cuda.h include/llama.h otherarch/llama-util.h
|
||||
gpttype_adapter_failsafe.o: $(GPTTYPE_ADAPTER)
|
||||
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) -c $< -o $@
|
||||
gpttype_adapter.o: $(GPTTYPE_ADAPTER)
|
||||
|
@ -552,8 +552,8 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
|
|||
|
||||
clean:
|
||||
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
|
||||
rm -vrf ggml-cuda/*.o
|
||||
rm -vrf ggml-cuda/template-instances/*.o
|
||||
rm -vrf ggml/src/ggml-cuda/*.o
|
||||
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
|
||||
|
||||
# useful tools
|
||||
main: examples/main/main.cpp build-info.h ggml.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o $(OBJS_FULL) $(OBJS)
|
||||
|
|
22
cmake/git-vars.cmake
Normal file
22
cmake/git-vars.cmake
Normal file
|
@ -0,0 +1,22 @@
|
|||
find_package(Git)
|
||||
|
||||
# the commit's SHA1
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_SHA1
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# the date of the commit
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_DATE
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# the subject of the commit
|
||||
execute_process(COMMAND
|
||||
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
@ -1,56 +0,0 @@
|
|||
# dependencies
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
# third-party
|
||||
|
||||
# ...
|
||||
|
||||
# examples
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
if (EMSCRIPTEN)
|
||||
else()
|
||||
add_subdirectory(cvector-generator)
|
||||
add_subdirectory(baby-llama)
|
||||
add_subdirectory(batched-bench)
|
||||
add_subdirectory(batched)
|
||||
add_subdirectory(benchmark)
|
||||
add_subdirectory(convert-llama2c-to-ggml)
|
||||
add_subdirectory(embedding)
|
||||
add_subdirectory(eval-callback)
|
||||
add_subdirectory(export-lora)
|
||||
add_subdirectory(finetune)
|
||||
add_subdirectory(gbnf-validator)
|
||||
add_subdirectory(gguf-split)
|
||||
add_subdirectory(gguf)
|
||||
add_subdirectory(gritlm)
|
||||
add_subdirectory(imatrix)
|
||||
add_subdirectory(infill)
|
||||
add_subdirectory(llama-bench)
|
||||
add_subdirectory(llava)
|
||||
add_subdirectory(lookahead)
|
||||
add_subdirectory(lookup)
|
||||
add_subdirectory(main)
|
||||
add_subdirectory(parallel)
|
||||
add_subdirectory(passkey)
|
||||
add_subdirectory(perplexity)
|
||||
add_subdirectory(quantize-stats)
|
||||
add_subdirectory(quantize)
|
||||
add_subdirectory(retrieval)
|
||||
if (LLAMA_RPC)
|
||||
add_subdirectory(rpc)
|
||||
endif()
|
||||
if (LLAMA_BUILD_SERVER)
|
||||
add_subdirectory(server)
|
||||
endif()
|
||||
if (LLAMA_SYCL)
|
||||
add_subdirectory(sycl)
|
||||
endif()
|
||||
add_subdirectory(save-load-state)
|
||||
add_subdirectory(simple)
|
||||
add_subdirectory(speculative)
|
||||
add_subdirectory(tokenize)
|
||||
add_subdirectory(train-text-from-scratch)
|
||||
endif()
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-baby-llama)
|
||||
add_executable(${TARGET} baby-llama.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-batched-bench)
|
||||
add_executable(${TARGET} batched-bench.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-batched)
|
||||
add_executable(${TARGET} batched.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,6 +0,0 @@
|
|||
set(TARGET llama-bench-matmult)
|
||||
add_executable(${TARGET} benchmark-matmult.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-convert-llama2c-to-ggml)
|
||||
add_executable(${TARGET} convert-llama2c-to-ggml.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-cvector-generator)
|
||||
add_executable(${TARGET} cvector-generator.cpp pca.hpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-embedding)
|
||||
add_executable(${TARGET} embedding.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,9 +0,0 @@
|
|||
set(TARGET llama-eval-callback)
|
||||
add_executable(${TARGET} eval-callback.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
||||
set(TEST_TARGET test-eval-callback)
|
||||
add_test(NAME ${TEST_TARGET} COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
|
||||
set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-export-lora)
|
||||
add_executable(${TARGET} export-lora.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-finetune)
|
||||
add_executable(${TARGET} finetune.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-gbnf-validator)
|
||||
add_executable(${TARGET} gbnf-validator.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-gguf-split)
|
||||
add_executable(${TARGET} gguf-split.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-gguf)
|
||||
add_executable(${TARGET} gguf.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-gritlm)
|
||||
add_executable(${TARGET} gritlm.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-imatrix)
|
||||
add_executable(${TARGET} imatrix.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -25,7 +25,7 @@ For faster computation, make sure to use GPU offloading via the `-ngl` argument
|
|||
## Example
|
||||
|
||||
```bash
|
||||
LLAMA_CUDA=1 make -j
|
||||
GGML_CUDA=1 make -j
|
||||
|
||||
# generate importance matrix (imatrix.dat)
|
||||
./llama-imatrix -m ggml-model-f16.gguf -f train-data.txt -ngl 99
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-infill)
|
||||
add_executable(${TARGET} infill.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-bench)
|
||||
add_executable(${TARGET} llama-bench.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,55 +0,0 @@
|
|||
|
||||
# For more information about using CMake with Android Studio, read the
|
||||
# documentation: https://d.android.com/studio/projects/add-native-code.html.
|
||||
# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
|
||||
|
||||
# Sets the minimum CMake version required for this project.
|
||||
cmake_minimum_required(VERSION 3.22.1)
|
||||
|
||||
# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
|
||||
# Since this is the top level CMakeLists.txt, the project name is also accessible
|
||||
# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
|
||||
# build script scope).
|
||||
project("llama-android")
|
||||
|
||||
## Fetch latest llama.cpp from GitHub
|
||||
#include(FetchContent)
|
||||
#FetchContent_Declare(
|
||||
# llama
|
||||
# GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
|
||||
# GIT_TAG master
|
||||
#)
|
||||
#
|
||||
## Also provides "common"
|
||||
#FetchContent_MakeAvailable(llama)
|
||||
|
||||
# llama.cpp CI uses the code from the current branch
|
||||
# ref: https://github.com/ggerganov/llama.cpp/pull/7341#issuecomment-2117617700
|
||||
add_subdirectory(../../../../../../ build-llama)
|
||||
|
||||
# Creates and names a library, sets it as either STATIC
|
||||
# or SHARED, and provides the relative paths to its source code.
|
||||
# You can define multiple libraries, and CMake builds them for you.
|
||||
# Gradle automatically packages shared libraries with your APK.
|
||||
#
|
||||
# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
|
||||
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
||||
# is preferred for the same purpose.
|
||||
#
|
||||
# In order to load a library into your app from Java/Kotlin, you must call
|
||||
# System.loadLibrary() and pass the name of the library defined here;
|
||||
# for GameActivity/NativeActivity derived applications, the same library name must be
|
||||
# used in the AndroidManifest.xml file.
|
||||
add_library(${CMAKE_PROJECT_NAME} SHARED
|
||||
# List C/C++ source files with relative paths to this CMakeLists.txt.
|
||||
llama-android.cpp)
|
||||
|
||||
# Specifies libraries CMake should link to your target library. You
|
||||
# can link libraries from various origins, such as libraries defined in this
|
||||
# build script, prebuilt third-party libraries, or Android system libraries.
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME}
|
||||
# List libraries link to the target library
|
||||
llama
|
||||
common
|
||||
android
|
||||
log)
|
|
@ -1,49 +0,0 @@
|
|||
# For more information about using CMake with Android Studio, read the
|
||||
# documentation: https://d.android.com/studio/projects/add-native-code.html.
|
||||
# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
|
||||
|
||||
# Sets the minimum CMake version required for this project.
|
||||
cmake_minimum_required(VERSION 3.22.1)
|
||||
|
||||
# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
|
||||
# Since this is the top level CMakeLists.txt, the project name is also accessible
|
||||
# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
|
||||
# build script scope).
|
||||
project("llama-android")
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
llama
|
||||
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
|
||||
GIT_TAG master
|
||||
)
|
||||
|
||||
# Also provides "common"
|
||||
FetchContent_MakeAvailable(llama)
|
||||
|
||||
# Creates and names a library, sets it as either STATIC
|
||||
# or SHARED, and provides the relative paths to its source code.
|
||||
# You can define multiple libraries, and CMake builds them for you.
|
||||
# Gradle automatically packages shared libraries with your APK.
|
||||
#
|
||||
# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
|
||||
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
||||
# is preferred for the same purpose.
|
||||
#
|
||||
# In order to load a library into your app from Java/Kotlin, you must call
|
||||
# System.loadLibrary() and pass the name of the library defined here;
|
||||
# for GameActivity/NativeActivity derived applications, the same library name must be
|
||||
# used in the AndroidManifest.xml file.
|
||||
add_library(${CMAKE_PROJECT_NAME} SHARED
|
||||
# List C/C++ source files with relative paths to this CMakeLists.txt.
|
||||
llama-android.cpp)
|
||||
|
||||
# Specifies libraries CMake should link to your target library. You
|
||||
# can link libraries from various origins, such as libraries defined in this
|
||||
# build script, prebuilt third-party libraries, or Android system libraries.
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME}
|
||||
# List libraries link to the target library
|
||||
llama
|
||||
common
|
||||
android
|
||||
log)
|
|
@ -1,38 +0,0 @@
|
|||
add_library(llava OBJECT
|
||||
llava.cpp
|
||||
llava.h
|
||||
clip.cpp
|
||||
clip.h
|
||||
)
|
||||
|
||||
target_link_libraries(llava PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
target_include_directories(llava PUBLIC .)
|
||||
target_include_directories(llava PUBLIC ../..)
|
||||
target_include_directories(llava PUBLIC ../../common)
|
||||
|
||||
target_compile_features(llava PRIVATE cxx_std_11)
|
||||
|
||||
add_library(llava_static STATIC $<TARGET_OBJECTS:llava>)
|
||||
if (BUILD_SHARED_LIBS)
|
||||
set_target_properties(llava PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_compile_definitions(llava PRIVATE LLAMA_SHARED LLAMA_BUILD)
|
||||
add_library(llava_shared SHARED $<TARGET_OBJECTS:llava>)
|
||||
target_link_libraries(llava_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
install(TARGETS llava_shared LIBRARY)
|
||||
endif()
|
||||
|
||||
if (NOT MSVC)
|
||||
target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
|
||||
endif()
|
||||
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(llava BUILD_INFO)
|
||||
endif()
|
||||
|
||||
set(TARGET llama-llava-cli)
|
||||
add_executable(${TARGET} llava-cli.cpp)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-cli)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -194,7 +194,7 @@ llama_print_timings: total time = 44411.01 ms / 377 tokens
|
|||
## Orin compile and run
|
||||
### compile
|
||||
```sh
|
||||
make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
|
||||
make GGML_CUDA=1 CUDA_DOCKER_ARCH=sm_87 GGML_CUDA_F16=1 -j 32
|
||||
```
|
||||
### run on Orin
|
||||
### case 1
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-lookahead)
|
||||
add_executable(${TARGET} lookahead.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,23 +0,0 @@
|
|||
set(TARGET llama-lookup)
|
||||
add_executable(${TARGET} lookup.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
||||
set(TARGET llama-lookup-create)
|
||||
add_executable(${TARGET} lookup-create.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
||||
set(TARGET llama-lookup-merge)
|
||||
add_executable(${TARGET} lookup-merge.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
||||
set(TARGET llama-lookup-stats)
|
||||
add_executable(${TARGET} lookup-stats.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,33 +0,0 @@
|
|||
cmake_minimum_required(VERSION 3.12)
|
||||
project("llama-cli-cmake-pkg" C CXX)
|
||||
set(TARGET llama-cli-cmake-pkg)
|
||||
|
||||
find_package(Llama 0.0.1 REQUIRED)
|
||||
|
||||
# Bake common functionality in with target. Because applications
|
||||
# using the relocatable Llama package should be outside of the
|
||||
# source tree, llama-cli-cmake-pkg pretends the dependencies are built-in.
|
||||
set(_common_path "${CMAKE_CURRENT_LIST_DIR}/../../common")
|
||||
add_library(common OBJECT)
|
||||
file(GLOB _common_files
|
||||
"${_common_path}/*.h"
|
||||
"${_common_path}/*.cpp"
|
||||
)
|
||||
target_sources(common PRIVATE ${_common_files})
|
||||
|
||||
# If the common project was part of "llama-cli-cmake-pkg" the transient
|
||||
# defines would automatically be attached. Because the common func-
|
||||
# tionality is separate, but dependent upon the defines, it must be
|
||||
# explicitly extracted from the "llama" target.
|
||||
#
|
||||
get_target_property(_llama_transient_defines llama
|
||||
INTERFACE_COMPILE_DEFINITIONS)
|
||||
|
||||
target_compile_definitions(common PRIVATE "${_llama_transient_defines}")
|
||||
|
||||
add_executable(${TARGET} ${CMAKE_CURRENT_LIST_DIR}/../main/main.cpp)
|
||||
target_include_directories(${TARGET} PRIVATE ${_common_path})
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-cli)
|
||||
add_executable(${TARGET} main.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-parallel)
|
||||
add_executable(${TARGET} parallel.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-passkey)
|
||||
add_executable(${TARGET} passkey.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-perplexity)
|
||||
add_executable(${TARGET} perplexity.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,6 +0,0 @@
|
|||
set(TARGET llama-quantize-stats)
|
||||
add_executable(${TARGET} quantize-stats.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,6 +0,0 @@
|
|||
set(TARGET llama-quantize)
|
||||
add_executable(${TARGET} quantize.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-retrieval)
|
||||
add_executable(${TARGET} retrieval.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,2 +0,0 @@
|
|||
add_executable(rpc-server rpc-server.cpp)
|
||||
target_link_libraries(rpc-server PRIVATE ggml llama)
|
|
@ -29,13 +29,13 @@ You can also run multiple `rpc-server` instances on the same host, each with a d
|
|||
|
||||
## Usage
|
||||
|
||||
On each host, build the corresponding backend with `cmake` and add `-DLLAMA_RPC=ON` to the build options.
|
||||
On each host, build the corresponding backend with `cmake` and add `-DGGML_RPC=ON` to the build options.
|
||||
For example, to build the CUDA backend with RPC support:
|
||||
|
||||
```bash
|
||||
mkdir build-rpc-cuda
|
||||
cd build-rpc-cuda
|
||||
cmake .. -DLLAMA_CUDA=ON -DLLAMA_RPC=ON
|
||||
cmake .. -DGGML_CUDA=ON -DGGML_RPC=ON
|
||||
cmake --build . --config Release
|
||||
```
|
||||
|
||||
|
@ -58,12 +58,12 @@ $ CUDA_VISIBLE_DEVICES=0 bin/rpc-server -p 50052
|
|||
This way you can run multiple `rpc-server` instances on the same host, each with a different CUDA device.
|
||||
|
||||
|
||||
On the main host build `llama.cpp` only with `-DLLAMA_RPC=ON`:
|
||||
On the main host build `llama.cpp` only with `-DGGML_RPC=ON`:
|
||||
|
||||
```bash
|
||||
mkdir build-rpc
|
||||
cd build-rpc
|
||||
cmake .. -DLLAMA_RPC=ON
|
||||
cmake .. -DGGML_RPC=ON
|
||||
cmake --build . --config Release
|
||||
```
|
||||
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-save-load-state)
|
||||
add_executable(${TARGET} save-load-state.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,51 +0,0 @@
|
|||
set(TARGET llama-server)
|
||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set(TARGET_SRCS
|
||||
server.cpp
|
||||
utils.hpp
|
||||
httplib.h
|
||||
)
|
||||
set(PUBLIC_ASSETS
|
||||
colorthemes.css
|
||||
style.css
|
||||
theme-beeninorder.css
|
||||
theme-ketivah.css
|
||||
theme-mangotango.css
|
||||
theme-playground.css
|
||||
theme-polarnight.css
|
||||
theme-snowstorm.css
|
||||
index.html
|
||||
index-new.html
|
||||
index.js
|
||||
completion.js
|
||||
system-prompts.js
|
||||
prompt-formats.js
|
||||
json-schema-to-grammar.mjs
|
||||
)
|
||||
foreach(asset ${PUBLIC_ASSETS})
|
||||
set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
|
||||
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
|
||||
list(APPEND TARGET_SRCS ${output})
|
||||
add_custom_command(
|
||||
DEPENDS "${input}"
|
||||
OUTPUT "${output}"
|
||||
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
|
||||
)
|
||||
endforeach()
|
||||
add_executable(${TARGET} ${TARGET_SRCS})
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (LLAMA_SERVER_SSL)
|
||||
find_package(OpenSSL REQUIRED)
|
||||
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
||||
target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
endif()
|
||||
if (WIN32)
|
||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||
endif()
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-simple)
|
||||
add_executable(${TARGET} simple.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-speculative)
|
||||
add_executable(${TARGET} speculative.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,9 +0,0 @@
|
|||
# MIT license
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
set(TARGET llama-ls-sycl-device)
|
||||
add_executable(${TARGET} ls-sycl-device.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
|
@ -8,10 +8,10 @@ cd build
|
|||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
#for FP16
|
||||
#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
|
||||
#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON # faster for long-prompt inference
|
||||
|
||||
#for FP32
|
||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
|
||||
#build example/main
|
||||
#cmake --build . --config Release --target main
|
||||
|
|
|
@ -13,10 +13,10 @@ if %errorlevel% neq 0 goto ERROR
|
|||
|
||||
:: for FP16
|
||||
:: faster for long-prompt inference
|
||||
:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
||||
:: cmake -G "MinGW Makefiles" .. -DGGML_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
|
||||
|
||||
:: for FP32
|
||||
cmake -G "Ninja" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
|
||||
cmake -G "Ninja" .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
|
||||
if %errorlevel% neq 0 goto ERROR
|
||||
:: build example/main only
|
||||
:: make main
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-tokenize)
|
||||
add_executable(${TARGET} tokenize.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -1,5 +0,0 @@
|
|||
set(TARGET llama-train-text-from-scratch)
|
||||
add_executable(${TARGET} train-text-from-scratch.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
@ -8,7 +8,9 @@
|
|||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-sycl/presets.hpp"
|
||||
|
||||
#define GGML_SYCL_NAME "SYCL"
|
||||
#define GGML_SYCL_MAX_DEVICES 48
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
|
@ -603,7 +603,7 @@ static void on_no_fattn_vec_case(const int D) {
|
|||
if (D == 64) {
|
||||
fprintf(stderr, "Unsupported KV type combination for head_size 64.\n");
|
||||
fprintf(stderr, "By default only f16 KV cache is supported.\n");
|
||||
fprintf(stderr, "Compile with LLAMA_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
||||
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n");
|
||||
GGML_ASSERT(false);
|
||||
} else if (D == 128) {
|
||||
fprintf(stderr, "Unsupported KV type combination for head_size 128.\n");
|
||||
|
@ -611,7 +611,7 @@ static void on_no_fattn_vec_case(const int D) {
|
|||
fprintf(stderr, " - K == q4_0, V == q4_0, 4.50 BPV\n");
|
||||
fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n");
|
||||
fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n");
|
||||
fprintf(stderr, "Compile with LLAMA_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
||||
fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n");
|
||||
GGML_ASSERT(false);
|
||||
} else {
|
||||
fprintf(stderr, "Unsupported KV type combination for head_size 256.\n");
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue