diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index e9809eade..09a611ec3 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -2073,7 +2073,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
     int realnpredict = kcpp_params->n_predict-stopper_unused_tokens;
     float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
     float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
-    printf("\nContextLimit: %d/%d, Processing:%.2fs (%.1fms/T), Generation:%.2fs (%.1fms/T), Total:%.2fs (%.2fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), tokens_per_second);
+    printf("\nContextLimit: %d/%d, Processing:%.2fs (%.1fms/T), Generation:%.2fs (%.1fms/T), Total:%.2fs (%.1fms/T = %.2fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), (1000.0f/tokens_per_second) , tokens_per_second);
     fflush(stdout);
     output.status = 1;
     generation_finished = true;
diff --git a/llama.cpp b/llama.cpp
index 1e452e5c6..16e8906fb 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6482,6 +6482,7 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
     }
     default:
         GGML_ASSERT_CONTINUE(false);
+        return 0;
     }
 }
 
@@ -6497,6 +6498,7 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
         }
         default:
             GGML_ASSERT_CONTINUE(false);
+            return 0;
     }
 }
 
diff --git a/otherarch/llama-util.h b/otherarch/llama-util.h
index e1986eb26..948da5a88 100644
--- a/otherarch/llama-util.h
+++ b/otherarch/llama-util.h
@@ -48,9 +48,9 @@
 
 #ifdef __GNUC__
 #ifdef __MINGW32__
-__attribute__((format_old(gnu_printf, 1, 2)))
+__attribute__((format(gnu_printf, 1, 2)))
 #else
-__attribute__((format_old(printf, 1, 2)))
+__attribute__((format(printf, 1, 2)))
 #endif
 #endif
 static std::string format_old(const char * fmt, ...) {
diff --git a/otherarch/llama_v2-util.h b/otherarch/llama_v2-util.h
index 41b6df386..99f7e69d9 100644
--- a/otherarch/llama_v2-util.h
+++ b/otherarch/llama_v2-util.h
@@ -48,14 +48,6 @@
         } \
     } while (0)
 
-#ifdef __GNUC__
-#ifdef __MINGW32__
-__attribute__((format_old(gnu_printf, 1, 2)))
-#else
-__attribute__((format_old(printf, 1, 2)))
-#endif
-#endif
-
 
 struct llama_v2_file {
     // use FILE * so we don't have to re-open the file to mmap
diff --git a/otherarch/rwkv_v3.cpp b/otherarch/rwkv_v3.cpp
index 5692f743c..a9982fd26 100644
--- a/otherarch/rwkv_v3.cpp
+++ b/otherarch/rwkv_v3.cpp
@@ -227,7 +227,7 @@ extern const enum rwkv_type rwkv_type_from_ggml[GGML_V3_TYPE_COUNT + 1] = {
     TYPE_COUNT,  /* COUNT */
 };
 
-extern const char * rwkv_type_to_string[TYPE_COUNT + 1] = {"FP32", "FP16", "Q4_0", "Q4_1", "Q4_1_O", "Q4_2", "Q4_3", "Q5_0", "Q5_1", "Q8_0", "unknown"};
+const char * rwkv_type_to_string[TYPE_COUNT + 1] = {"FP32", "FP16", "Q4_0", "Q4_1", "Q4_1_O", "Q4_2", "Q4_3", "Q5_0", "Q5_1", "Q8_0", "unknown"};
 
 enum rwkv_type rwkv_type_from_string(const char * str) {
     for (int ord = 0; ord < TYPE_COUNT; ord++) {