mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
timing for init step, clip for vulkan
This commit is contained in:
parent
6200b6d64e
commit
1a7ecd55e6
2 changed files with 9 additions and 4 deletions
6
Makefile
6
Makefile
|
@ -470,6 +470,8 @@ llavaclip_default.o: examples/llava/clip.cpp examples/llava/clip.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h
|
llavaclip_cublas.o: examples/llava/clip.cpp examples/llava/clip.h
|
||||||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
||||||
|
llavaclip_vulkan.o: examples/llava/clip.cpp examples/llava/clip.h
|
||||||
|
$(CXX) $(CXXFLAGS) $(VULKAN_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#this is only used for openblas and accelerate
|
#this is only used for openblas and accelerate
|
||||||
ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h
|
ggml-blas.o: ggml/src/ggml-blas.cpp ggml/include/ggml-blas.h
|
||||||
|
@ -663,10 +665,10 @@ koboldcpp_hipblas:
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef VULKAN_BUILD
|
ifdef VULKAN_BUILD
|
||||||
koboldcpp_vulkan: ggml_v4_vulkan.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter_vulkan.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_vulkan.o $(OBJS_FULL) $(OBJS)
|
koboldcpp_vulkan: ggml_v4_vulkan.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o gpttype_adapter_vulkan.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o $(OBJS_FULL) $(OBJS)
|
||||||
$(VULKAN_BUILD)
|
$(VULKAN_BUILD)
|
||||||
ifdef NOAVX2_BUILD
|
ifdef NOAVX2_BUILD
|
||||||
koboldcpp_vulkan_noavx2: ggml_v4_vulkan_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_vulkan_noavx2.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_default.o llava.o ggml-backend_vulkan.o $(OBJS_SIMPLE) $(OBJS)
|
koboldcpp_vulkan_noavx2: ggml_v4_vulkan_noavx2.o ggml_v3_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_vulkan_noavx2.o ggml-vulkan.o sdcpp_vulkan.o whispercpp_default.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o $(OBJS_SIMPLE) $(OBJS)
|
||||||
$(VULKAN_BUILD)
|
$(VULKAN_BUILD)
|
||||||
else
|
else
|
||||||
koboldcpp_vulkan_noavx2:
|
koboldcpp_vulkan_noavx2:
|
||||||
|
|
|
@ -1961,6 +1961,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
dry_sequence_breakers.clear();
|
dry_sequence_breakers.clear();
|
||||||
dry_max_token_repeat.clear();
|
dry_max_token_repeat.clear();
|
||||||
|
|
||||||
|
double time0 = 0, time1 = 0, time2 = 0;
|
||||||
|
timer_start();
|
||||||
|
|
||||||
for(int x=0;x<stop_token_max;++x)
|
for(int x=0;x<stop_token_max;++x)
|
||||||
{
|
{
|
||||||
std::string stopper = inputs.stop_sequence[x];
|
std::string stopper = inputs.stop_sequence[x];
|
||||||
|
@ -2421,8 +2424,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
bool startedsampling = false;
|
bool startedsampling = false;
|
||||||
bool v3_use_scratch = true; //for normal inference always use scratch
|
bool v3_use_scratch = true; //for normal inference always use scratch
|
||||||
|
|
||||||
|
time0 = timer_check();
|
||||||
timer_start();
|
timer_start();
|
||||||
double time1 = 0, time2 = 0;
|
|
||||||
|
|
||||||
if(file_format == FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
|
if(file_format == FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
|
||||||
{
|
{
|
||||||
|
@ -2872,7 +2875,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
|
float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
|
||||||
float ts2 = (1000.0/pt2);
|
float ts2 = (1000.0/pt2);
|
||||||
float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
|
float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
|
||||||
printf("\nCtxLimit:%d/%d, Amt:%d/%d, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",(int)current_context_tokens.size(),(int)nctx, realnpredict, kcpp_params->n_predict, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second);
|
printf("\nCtxLimit:%d/%d, Amt:%d/%d, Init:%.2fs, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",(int)current_context_tokens.size(),(int)nctx, realnpredict, kcpp_params->n_predict, time0, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
output.status = 1;
|
output.status = 1;
|
||||||
output.stopreason = last_stop_reason;
|
output.stopreason = last_stop_reason;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue