From 90f5cd0f6772beeeeb0ec853f4efda85059ec623 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed, 30 Oct 2024 00:59:34 +0800 Subject: [PATCH] wip logprobs data --- colab.ipynb | 2 +- expose.cpp | 8 ++++++++ expose.h | 7 +++++++ gpttype_adapter.cpp | 50 +++++++++++++++++++++++++++++---------------- koboldcpp.py | 9 ++++++++ model_adapter.h | 13 ++++++++++++ 6 files changed, 70 insertions(+), 19 deletions(-) diff --git a/colab.ipynb b/colab.ipynb index a869c21ea..efb844490 100644 --- a/colab.ipynb +++ b/colab.ipynb @@ -48,7 +48,7 @@ "source": [ "#@title v-- Enter your model below and then click this to start Koboldcpp\r\n", "\r\n", - "Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n", + "Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/bartowski/Rocinante-12B-v1.1-GGUF/resolve/main/Rocinante-12B-v1.1-Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n", "Layers = 99 #@param [99]{allow-input: true}\r\n", "ContextSize = 4096 #@param [4096,8192] {allow-input: true}\r\n", "FlashAttention = True #@param {type:\"boolean\"}\r\n", diff --git a/expose.cpp b/expose.cpp index 7093cb4f6..8635262d5 100644 --- a/expose.cpp +++ b/expose.cpp @@ -294,5 +294,13 @@ extern "C" return output; } + last_logprobs_outputs last_logprobs() + { + last_logprobs_outputs output; + std::vector toppicks = gpttype_get_top_picks_data(); //copy top picks + output.count = 0; + return output; + } + } diff --git a/expose.h b/expose.h index 97c4a38ab..b713fdf72 100644 --- a/expose.h +++ b/expose.h @@ -118,6 +118,13 @@ struct token_count_outputs int count = 0; int * ids; //we'll just use shared memory for this one, bit of a hack }; +struct last_logprobs_outputs { + int count = 0; + char ** selected_token; + float * selected_logprob; + char * tokens[5]; + float * logprobs[5]; +}; struct sd_load_model_inputs { const char * model_filename = nullptr; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 9a69cba97..7f301ac8f 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -114,7 +114,7 @@ static std::vector banned_phrases; static std::unordered_multimap> dry_sequence_breakers; // Multi-mapping from first token of sequence to tail of sequence (tail is empty for a single token) static std::vector dry_repeat_count; // Indexed as last_n_tokens static std::unordered_map dry_max_token_repeat; -static std::vector top_picks; +static std::vector top_picks_history; static int remaining_tokens = 0; static int stopper_unused_tokens = 0; static std::mutex concat_output_mtx; @@ -587,7 +587,8 @@ llama_token sample_token(llama_token_data_array * candidates, std::mt19937 & rng sample_softmax(candidates); std::vector probs; probs.reserve(candidates->size); - top_picks.clear(); + TopPicksData newpick; + for (size_t i = 0; i < candidates->size; ++i) { probs.push_back(candidates->data[i].p); } @@ -595,18 +596,20 @@ llama_token sample_token(llama_token_data_array * candidates, std::mt19937 & rng std::discrete_distribution<> dist(probs.begin(), probs.end()); int idx = dist(rng); - if(debugmode==1) + newpick.selected_token = FileFormatTokenizeID(candidates->data[idx].id, file_format, true); + newpick.selected_logprob = candidates->data[idx].logit; + newpick.selected_probability = candidates->data[idx].p; + newpick.selected_tokenid = candidates->data[idx].id; + for (size_t i = 0; (i < candidates->size && i<5); ++i) { - top_picks.push_back(candidates->data[idx]); - for (size_t i = 0; (i < candidates->size && i<4); ++i) - { - if(i!=idx) - { - top_picks.push_back(candidates->data[i]); - } - } + newpick.tokens.push_back(FileFormatTokenizeID(candidates->data[i].id, file_format, true)); + newpick.logprobs.push_back(candidates->data[i].logit); + newpick.p.push_back(candidates->data[i].p); + newpick.tokenid.push_back(candidates->data[i].id); } + top_picks_history.push_back(newpick); + llama_token result = candidates->data[idx].id; return result; } @@ -2422,6 +2425,11 @@ const std::string & gpttype_get_pending_output() return concat_output_reader_copy_poll; } +const std::vector gpttype_get_top_picks_data() +{ + return top_picks_history; +} + bool VecContainsIntVal(const std::vector & vec, const int val) { for (const auto &matched : vec) @@ -2484,6 +2492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) dry_repeat_count.clear(); dry_sequence_breakers.clear(); dry_max_token_repeat.clear(); + top_picks_history.clear(); double time0 = 0, time1 = 0, time2 = 0; timer_start(); @@ -3271,20 +3280,25 @@ generation_outputs gpttype_generate(const generation_inputs inputs) { printf("\rGenerating (%d / %d tokens)", (kcpp_data->n_predict - remaining_tokens), kcpp_data->n_predict); } - if(debugmode==1 && top_picks.size()>0) + if(debugmode==1 && top_picks_history.size()>0) { printf(" ["); bool firstloop = true; - for (auto & pick : top_picks) + TopPicksData toppick = top_picks_history[top_picks_history.size()-1]; + std::string topstr = toppick.selected_token; + ::utreplace(topstr, "\n", "\\n"); + printf("(%s %.2f%%)", RemoveBell(topstr).c_str(), toppick.selected_probability*100); + int maxtoshow = (toppick.tokenid.size()>4?4:toppick.tokenid.size()); + for (int i=0;i tokens; + std::vector tokenid; + std::vector logprobs; + std::vector p; +}; + enum ModelLoadResult { FAIL = 0, @@ -81,6 +93,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs); bool gpttype_generate_abort(); const std::string & gpttype_get_pending_output(); std::vector gpttype_get_token_arr(const std::string & input, bool addbos); +const std::vector gpttype_get_top_picks_data(); bool sdtype_load_model(const sd_load_model_inputs inputs); sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs);