diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index e9809eade..09a611ec3 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -2073,7 +2073,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o int realnpredict = kcpp_params->n_predict-stopper_unused_tokens; float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict)); float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2)); - printf("\nContextLimit: %d/%d, Processing:%.2fs (%.1fms/T), Generation:%.2fs (%.1fms/T), Total:%.2fs (%.2fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), tokens_per_second); + printf("\nContextLimit: %d/%d, Processing:%.2fs (%.1fms/T), Generation:%.2fs (%.1fms/T), Total:%.2fs (%.1fms/T = %.2fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), (1000.0f/tokens_per_second) , tokens_per_second); fflush(stdout); output.status = 1; generation_finished = true; diff --git a/llama.cpp b/llama.cpp index 1e452e5c6..16e8906fb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6482,6 +6482,7 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) { } default: GGML_ASSERT_CONTINUE(false); + return 0; } } @@ -6497,6 +6498,7 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) { } default: GGML_ASSERT_CONTINUE(false); + return 0; } } diff --git a/otherarch/llama-util.h b/otherarch/llama-util.h index e1986eb26..948da5a88 100644 --- a/otherarch/llama-util.h +++ b/otherarch/llama-util.h @@ -48,9 +48,9 @@ #ifdef __GNUC__ #ifdef __MINGW32__ -__attribute__((format_old(gnu_printf, 1, 2))) +__attribute__((format(gnu_printf, 1, 2))) #else -__attribute__((format_old(printf, 1, 2))) +__attribute__((format(printf, 1, 2))) #endif #endif static std::string format_old(const char * fmt, ...) { diff --git a/otherarch/llama_v2-util.h b/otherarch/llama_v2-util.h index 41b6df386..99f7e69d9 100644 --- a/otherarch/llama_v2-util.h +++ b/otherarch/llama_v2-util.h @@ -48,14 +48,6 @@ } \ } while (0) -#ifdef __GNUC__ -#ifdef __MINGW32__ -__attribute__((format_old(gnu_printf, 1, 2))) -#else -__attribute__((format_old(printf, 1, 2))) -#endif -#endif - struct llama_v2_file { // use FILE * so we don't have to re-open the file to mmap diff --git a/otherarch/rwkv_v3.cpp b/otherarch/rwkv_v3.cpp index 5692f743c..a9982fd26 100644 --- a/otherarch/rwkv_v3.cpp +++ b/otherarch/rwkv_v3.cpp @@ -227,7 +227,7 @@ extern const enum rwkv_type rwkv_type_from_ggml[GGML_V3_TYPE_COUNT + 1] = { TYPE_COUNT, /* COUNT */ }; -extern const char * rwkv_type_to_string[TYPE_COUNT + 1] = {"FP32", "FP16", "Q4_0", "Q4_1", "Q4_1_O", "Q4_2", "Q4_3", "Q5_0", "Q5_1", "Q8_0", "unknown"}; +const char * rwkv_type_to_string[TYPE_COUNT + 1] = {"FP32", "FP16", "Q4_0", "Q4_1", "Q4_1_O", "Q4_2", "Q4_3", "Q5_0", "Q5_1", "Q8_0", "unknown"}; enum rwkv_type rwkv_type_from_string(const char * str) { for (int ord = 0; ord < TYPE_COUNT; ord++) {