mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # CMakeLists.txt # Makefile # README.md
This commit is contained in:
commit
cf94340dfc
4 changed files with 148 additions and 77 deletions
17
llama.cpp
17
llama.cpp
|
@ -1620,7 +1620,7 @@ static bool llama_eval_internal(
|
|||
model.layers[il].w1,
|
||||
cur);
|
||||
offload_func(cur);
|
||||
ggml_set_name(cur, "result_w2");
|
||||
ggml_set_name(cur, "result_w1");
|
||||
|
||||
// SILU activation
|
||||
cur = ggml_silu(ctx0, cur);
|
||||
|
@ -1657,11 +1657,7 @@ static bool llama_eval_internal(
|
|||
{
|
||||
cur = ggml_rms_norm(ctx0, inpL);
|
||||
offload_func_nr(cur);
|
||||
ggml_set_name(cur, "rms_norm_inpL");
|
||||
|
||||
cur = ggml_rms_norm(ctx0, cur);
|
||||
offload_func_nr(cur);
|
||||
ggml_set_name(cur, "rms_norm_after");
|
||||
ggml_set_name(cur, "rms_norm_2");
|
||||
|
||||
// cur = cur*norm(broadcasted)
|
||||
cur = ggml_mul(ctx0, cur, model.norm);
|
||||
|
@ -3471,9 +3467,12 @@ void llama_print_timings(struct llama_context * ctx) {
|
|||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us / 1000.0);
|
||||
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample);
|
||||
fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval);
|
||||
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval);
|
||||
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample, 1e6 / ctx->t_sample_us * n_sample);
|
||||
fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval, 1e6 / ctx->t_p_eval_us * n_p_eval);
|
||||
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval, 1e6 / ctx->t_eval_us * n_eval);
|
||||
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue