diff --git a/expose.cpp b/expose.cpp index 8635262d5..efa1c8bb3 100644 --- a/expose.cpp +++ b/expose.cpp @@ -294,11 +294,29 @@ extern "C" return output; } + static std::vector last_logprob_toppicks; + static std::vector last_logprob_items; last_logprobs_outputs last_logprobs() { last_logprobs_outputs output; - std::vector toppicks = gpttype_get_top_picks_data(); //copy top picks - output.count = 0; + last_logprob_items.clear(); + last_logprob_toppicks.clear(); + last_logprob_toppicks = gpttype_get_top_picks_data(); //copy top picks + for(int i=0;idata[idx].id, file_format, true); - newpick.selected_logprob = candidates->data[idx].logit; + newpick.selected_logprob = logf(candidates->data[idx].p); newpick.selected_probability = candidates->data[idx].p; newpick.selected_tokenid = candidates->data[idx].id; - for (size_t i = 0; (i < candidates->size && i<5); ++i) + for (size_t i = 0; (i < candidates->size && idata[i].id, file_format, true)); - newpick.logprobs.push_back(candidates->data[i].logit); + newpick.logprobs.push_back(logf(candidates->data[i].p)); newpick.p.push_back(candidates->data[i].p); newpick.tokenid.push_back(candidates->data[i].id); } @@ -2467,6 +2467,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) printf("\nWarning: KCPP text generation not initialized!\n"); output.text = nullptr; output.status = 0; + output.prompt_tokens = output.completion_tokens = 0; output.stopreason = stop_reason::INVALID; generation_finished = true; return output; @@ -3142,6 +3143,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) fprintf(stderr, "\nFailed to predict at %d! Check your context buffer sizes!\n",n_past); output.text = nullptr; output.status = 0; + output.prompt_tokens = output.completion_tokens = 0; output.stopreason = stop_reason::INVALID; generation_finished = true; return output; @@ -3471,6 +3473,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) fprintf(stderr, "\nFailed to eval llava image at %d!\n",n_past); output.text = nullptr; output.status = 0; + output.prompt_tokens = output.completion_tokens = 0; output.stopreason = stop_reason::INVALID; generation_finished = true; return output; @@ -3482,6 +3485,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) fprintf(stderr, "\nLLAVA image tokens mismatch at %d! (%d vs %d tokens)\n",n_past,llavatokenscounted,llavatokensevaled); output.text = nullptr; output.status = 0; + output.prompt_tokens = output.completion_tokens = 0; output.stopreason = stop_reason::INVALID; generation_finished = true; return output; @@ -3534,6 +3538,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs) printf("\nCtxLimit:%d/%d, Amt:%d/%d, Init:%.2fs, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",(int)current_context_tokens.size(),(int)nctx, realnpredict, kcpp_data->n_predict, time0, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second); fflush(stdout); output.status = 1; + int finaltokcount = (int)current_context_tokens.size()-realnpredict; + output.prompt_tokens = (finaltokcount<0?0:finaltokcount); + output.completion_tokens = realnpredict; output.stopreason = last_stop_reason; last_eval_time = pt2; last_process_time = pt1; diff --git a/klite.embd b/klite.embd index 582171caf..ac3b7175d 100644 --- a/klite.embd +++ b/klite.embd @@ -12,7 +12,7 @@ Current version indicated by LITEVER below. -->