mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-22 03:10:03 +00:00
added a simple program to do quantization for clip models
This commit is contained in:
parent
2dc647f892
commit
227f59dab6
2 changed files with 35 additions and 3 deletions
8
Makefile
8
Makefile
|
|
@ -1,5 +1,5 @@
|
|||
default: koboldcpp_default koboldcpp_failsafe koboldcpp_openblas koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2
|
||||
tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
|
||||
tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip
|
||||
dev: koboldcpp_openblas
|
||||
dev2: koboldcpp_clblast
|
||||
|
||||
|
|
@ -507,7 +507,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
|
|||
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||
|
||||
clean:
|
||||
rm -vf *.o main sdmain quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
|
||||
rm -vf *.o main sdmain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
|
||||
|
||||
# useful tools
|
||||
main: examples/main/main.cpp common/sampling.cpp build-info.h ggml.o ggml-quants.o ggml-alloc.o ggml-backend.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
||||
|
|
@ -600,7 +600,7 @@ koboldcpp_vulkan_noavx2:
|
|||
endif
|
||||
|
||||
# tools
|
||||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o
|
||||
quantize_gguf: examples/quantize/quantize.cpp ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_gptj: ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
|
@ -610,6 +610,8 @@ quantize_neox: ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o otherarc
|
|||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_mpt: ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o otherarch/tools/mpt_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_clip: ggml.o llama.o ggml-quants.o ggml-alloc.o ggml-backend.o examples/llava/clip.cpp examples/llava/clip.h examples/llava/quantclip.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
||||
#window simple clinfo
|
||||
simpleclinfo: simpleclinfo.cpp
|
||||
|
|
|
|||
30
examples/llava/quantclip.cpp
Normal file
30
examples/llava/quantclip.cpp
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#include "ggml.h"
|
||||
#include "common.h"
|
||||
#include "clip.h"
|
||||
#include "llava.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include "base64.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
ggml_time_init();
|
||||
|
||||
if (argc != 3) {
|
||||
fprintf(stderr, "usage: %s mmproj-f16.gguf output-mmproj-quantized.gguf\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string fname_inp = argv[1];
|
||||
const std::string fname_out = argv[2];
|
||||
|
||||
printf("quantizing mmproj clip model to q4_1... ");
|
||||
clip_model_quantize(fname_inp.c_str(), fname_out.c_str(), GGML_TYPE_Q4_1);
|
||||
printf("done\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue