mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 00:54:41 +00:00
Merge commit '9b61acf060
' into concedo_experimental
# Conflicts: # Makefile # docs/multimodal/MobileVLM.md # docs/multimodal/glmedge.md # docs/multimodal/llava.md # docs/multimodal/minicpmo2.6.md # docs/multimodal/minicpmv2.5.md # docs/multimodal/minicpmv2.6.md # requirements/requirements-all.txt # tools/mtmd/CMakeLists.txt # tools/mtmd/README.md # tools/mtmd/android/adb_run.sh # tools/mtmd/android/build_64.sh # tools/mtmd/clip-quantize-cli.cpp
This commit is contained in:
commit
0fa435b2a6
29 changed files with 17 additions and 18 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -51,7 +51,6 @@ models-mnt
|
|||
/infill
|
||||
/libllama.so
|
||||
/llama-bench
|
||||
/llava-cli
|
||||
/lookahead
|
||||
/lookup
|
||||
/main
|
||||
|
|
|
@ -470,10 +470,10 @@ add_library(common2
|
|||
common/common.h
|
||||
common/sampling.cpp
|
||||
common/sampling.h
|
||||
tools/llava/llava.cpp
|
||||
tools/llava/llava.h
|
||||
tools/llava/clip.cpp
|
||||
tools/llava/clip.h
|
||||
tools/mtmd/llava.cpp
|
||||
tools/mtmd/llava.h
|
||||
tools/mtmd/clip.cpp
|
||||
tools/mtmd/clip.h
|
||||
src/unicode.h
|
||||
src/unicode.cpp
|
||||
src/unicode-data.cpp
|
||||
|
|
12
Makefile
12
Makefile
|
@ -511,7 +511,7 @@ sgemm_failsafe.o: ggml/src/ggml-cpu/llamafile/sgemm.cpp ggml/src/ggml-cpu/llamaf
|
|||
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
||||
ggml-alloc.o: ggml/src/ggml-alloc.c ggml/include/ggml.h ggml/include/ggml-alloc.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
llava.o: tools/llava/llava.cpp tools/llava/llava.h
|
||||
llava.o: tools/mtmd/llava.cpp tools/mtmd/llava.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
unicode.o: src/unicode.cpp src/unicode.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
@ -541,11 +541,11 @@ ggml-backend-reg_vulkan.o: ggml/src/ggml-backend-reg.cpp ggml/src/ggml-backend-i
|
|||
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||
ggml-backend-reg_cublas.o: ggml/src/ggml-backend-reg.cpp ggml/src/ggml-backend-impl.h ggml/include/ggml.h ggml/include/ggml-backend.h ggml/include/ggml-cpu.h
|
||||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||
llavaclip_default.o: tools/llava/clip.cpp tools/llava/clip.h
|
||||
llavaclip_default.o: tools/mtmd/clip.cpp tools/mtmd/clip.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
llavaclip_cublas.o: tools/llava/clip.cpp tools/llava/clip.h
|
||||
llavaclip_cublas.o: tools/mtmd/clip.cpp tools/mtmd/clip.h
|
||||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||
llavaclip_vulkan.o: tools/llava/clip.cpp tools/llava/clip.h
|
||||
llavaclip_vulkan.o: tools/mtmd/clip.cpp tools/mtmd/clip.h
|
||||
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||
|
||||
#this is only used for accelerate
|
||||
|
@ -673,7 +673,7 @@ ttsmain: tools/tts/tts.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-op
|
|||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
gguf-split: tools/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
mtmd-cli: tools/llava/mtmd-cli.cpp tools/llava/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||
mtmd-cli: tools/mtmd/mtmd-cli.cpp tools/mtmd/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
ggml/src/ggml-vulkan-shaders.cpp:
|
||||
|
@ -827,7 +827,7 @@ quantize_neox: otherarch/tools/neox_quantize.cpp otherarch/tools/common-ggml.cpp
|
|||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_mpt: otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_clip: tools/llava/clip.cpp tools/llava/clip.h tools/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
||||
quantize_clip: tools/mtmd/clip.cpp tools/mtmd/clip.h tools/quantclip.cpp ggml_v3.o ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL)
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
#window simple clinfo
|
||||
|
|
|
@ -2212,14 +2212,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
|
||||
add_opt(common_arg(
|
||||
{"--mmproj"}, "FILE",
|
||||
"path to a multimodal projector file. see tools/llava/README.md",
|
||||
"path to a multimodal projector file. see tools/mtmd/README.md",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.mmproj.path = value;
|
||||
}
|
||||
).set_examples(mmproj_examples));
|
||||
add_opt(common_arg(
|
||||
{"--mmproj-url"}, "URL",
|
||||
"URL to a multimodal projector file. see tools/llava/README.md",
|
||||
"URL to a multimodal projector file. see tools/mtmd/README.md",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.mmproj.url = value;
|
||||
}
|
||||
|
|
|
@ -336,7 +336,7 @@ struct common_params {
|
|||
|
||||
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
||||
|
||||
// multimodal models (see tools/llava)
|
||||
// multimodal models (see tools/mtmd)
|
||||
struct common_params_model mmproj;
|
||||
bool mmproj_use_gpu = true; // use GPU for multimodal model
|
||||
bool no_mmproj = false; // explicitly disable multimodal model
|
||||
|
|
|
@ -40,8 +40,8 @@
|
|||
#include "neox_v2.cpp"
|
||||
#include "neox_v3.cpp"
|
||||
#include "mpt_v3.cpp"
|
||||
#include "tools/llava/clip.h"
|
||||
#include "tools/llava/llava.h"
|
||||
#include "tools/mtmd/clip.h"
|
||||
#include "tools/mtmd/llava.h"
|
||||
#include "common/common.h"
|
||||
|
||||
//const
|
||||
|
|
|
@ -52,7 +52,7 @@ logit_bias_max = 512
|
|||
dry_seq_break_max = 128
|
||||
|
||||
# global vars
|
||||
KcppVersion = "1.90.2"
|
||||
KcppVersion = "1.91"
|
||||
showdebug = True
|
||||
kcpp_instance = None #global running instance
|
||||
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False}
|
||||
|
|
|
@ -27,7 +27,7 @@ else()
|
|||
add_subdirectory(run)
|
||||
add_subdirectory(tokenize)
|
||||
add_subdirectory(tts)
|
||||
add_subdirectory(llava)
|
||||
add_subdirectory(mtmd)
|
||||
if (GGML_RPC)
|
||||
add_subdirectory(rpc)
|
||||
endif()
|
||||
|
|
Before Width: | Height: | Size: 121 KiB After Width: | Height: | Size: 121 KiB |
Loading…
Add table
Add a link
Reference in a new issue