From 1ef41c2124ebfcd9ca3447f60e57ed287129486d Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 13 Mar 2025 14:58:08 +0800 Subject: [PATCH] streamline output console log (+1 squashed commits) Squashed commits: [ca474bdd] streamline output console log --- examples/llava/clip.cpp | 5 +++++ gpttype_adapter.cpp | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index d8022edb4..82d71a314 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -3171,6 +3171,11 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i { quantize = false; } + // //temp fix for gemma3 + // if(name.find("ffn_up.weight") != std::string::npos) + // { + // quantize = false; + // } } if (quantize) { diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index fbfd59132..48ca9f32b 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -3993,7 +3993,14 @@ generation_outputs gpttype_generate(const generation_inputs inputs) float pt2 = (time2*1000.0/(realnpredict<=0?1:realnpredict)); float ts2 = (1000.0/pt2); float tokens_per_second = (realnpredict <= 0 ? 0 : realnpredict / (time1 + time2)); - printf("\n[%s] CtxLimit:%d/%d, Amt:%d/%d, Init:%.2fs, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",get_timestamp_str().c_str(),(int)current_context_tokens.size(),(int)nctx, realnpredict, kcpp_data->n_predict, time0, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second); + if(debugmode==1) + { + printf("\n[%s] CtxLimit:%d/%d, Amt:%d/%d, Init:%.2fs, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",get_timestamp_str().c_str(),(int)current_context_tokens.size(),(int)nctx, realnpredict, kcpp_data->n_predict, time0, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second); + } + else + { + printf("\n[%s] CtxLimit:%d/%d, Amt:%d/%d, Init:%.2fs, Process:%.2fs (%.2fT/s), Generate:%.2fs (%.2fT/s), Total:%.2fs",get_timestamp_str().c_str(),(int)current_context_tokens.size(),(int)nctx, realnpredict, kcpp_data->n_predict, time0, time1, ts1, time2, ts2, (time1 + time2)); + } if(debugmode==1 && !is_quiet && (draft_successes+draft_failures)>0) { printf("\n(Draft Results - Success:%d, Failure:%d)",draft_successes,draft_failures);