From 90f5cd0f6772beeeeb0ec853f4efda85059ec623 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Wed, 30 Oct 2024 00:59:34 +0800
Subject: [PATCH] wip logprobs data
---
colab.ipynb | 2 +-
expose.cpp | 8 ++++++++
expose.h | 7 +++++++
gpttype_adapter.cpp | 50 +++++++++++++++++++++++++++++----------------
koboldcpp.py | 9 ++++++++
model_adapter.h | 13 ++++++++++++
6 files changed, 70 insertions(+), 19 deletions(-)
diff --git a/colab.ipynb b/colab.ipynb
index a869c21ea..efb844490 100644
--- a/colab.ipynb
+++ b/colab.ipynb
@@ -48,7 +48,7 @@
"source": [
"#@title v-- Enter your model below and then click this to start Koboldcpp\r\n",
"\r\n",
- "Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n",
+ "Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/bartowski/Rocinante-12B-v1.1-GGUF/resolve/main/Rocinante-12B-v1.1-Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n",
"Layers = 99 #@param [99]{allow-input: true}\r\n",
"ContextSize = 4096 #@param [4096,8192] {allow-input: true}\r\n",
"FlashAttention = True #@param {type:\"boolean\"}\r\n",
diff --git a/expose.cpp b/expose.cpp
index 7093cb4f6..8635262d5 100644
--- a/expose.cpp
+++ b/expose.cpp
@@ -294,5 +294,13 @@ extern "C"
return output;
}
+ last_logprobs_outputs last_logprobs()
+ {
+ last_logprobs_outputs output;
+ std::vector toppicks = gpttype_get_top_picks_data(); //copy top picks
+ output.count = 0;
+ return output;
+ }
+
}
diff --git a/expose.h b/expose.h
index 97c4a38ab..b713fdf72 100644
--- a/expose.h
+++ b/expose.h
@@ -118,6 +118,13 @@ struct token_count_outputs
int count = 0;
int * ids; //we'll just use shared memory for this one, bit of a hack
};
+struct last_logprobs_outputs {
+ int count = 0;
+ char ** selected_token;
+ float * selected_logprob;
+ char * tokens[5];
+ float * logprobs[5];
+};
struct sd_load_model_inputs
{
const char * model_filename = nullptr;
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 9a69cba97..7f301ac8f 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -114,7 +114,7 @@ static std::vector banned_phrases;
static std::unordered_multimap> dry_sequence_breakers; // Multi-mapping from first token of sequence to tail of sequence (tail is empty for a single token)
static std::vector dry_repeat_count; // Indexed as last_n_tokens
static std::unordered_map dry_max_token_repeat;
-static std::vector top_picks;
+static std::vector top_picks_history;
static int remaining_tokens = 0;
static int stopper_unused_tokens = 0;
static std::mutex concat_output_mtx;
@@ -587,7 +587,8 @@ llama_token sample_token(llama_token_data_array * candidates, std::mt19937 & rng
sample_softmax(candidates);
std::vector probs;
probs.reserve(candidates->size);
- top_picks.clear();
+ TopPicksData newpick;
+
for (size_t i = 0; i < candidates->size; ++i) {
probs.push_back(candidates->data[i].p);
}
@@ -595,18 +596,20 @@ llama_token sample_token(llama_token_data_array * candidates, std::mt19937 & rng
std::discrete_distribution<> dist(probs.begin(), probs.end());
int idx = dist(rng);
- if(debugmode==1)
+ newpick.selected_token = FileFormatTokenizeID(candidates->data[idx].id, file_format, true);
+ newpick.selected_logprob = candidates->data[idx].logit;
+ newpick.selected_probability = candidates->data[idx].p;
+ newpick.selected_tokenid = candidates->data[idx].id;
+ for (size_t i = 0; (i < candidates->size && i<5); ++i)
{
- top_picks.push_back(candidates->data[idx]);
- for (size_t i = 0; (i < candidates->size && i<4); ++i)
- {
- if(i!=idx)
- {
- top_picks.push_back(candidates->data[i]);
- }
- }
+ newpick.tokens.push_back(FileFormatTokenizeID(candidates->data[i].id, file_format, true));
+ newpick.logprobs.push_back(candidates->data[i].logit);
+ newpick.p.push_back(candidates->data[i].p);
+ newpick.tokenid.push_back(candidates->data[i].id);
}
+ top_picks_history.push_back(newpick);
+
llama_token result = candidates->data[idx].id;
return result;
}
@@ -2422,6 +2425,11 @@ const std::string & gpttype_get_pending_output()
return concat_output_reader_copy_poll;
}
+const std::vector gpttype_get_top_picks_data()
+{
+ return top_picks_history;
+}
+
bool VecContainsIntVal(const std::vector & vec, const int val)
{
for (const auto &matched : vec)
@@ -2484,6 +2492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
dry_repeat_count.clear();
dry_sequence_breakers.clear();
dry_max_token_repeat.clear();
+ top_picks_history.clear();
double time0 = 0, time1 = 0, time2 = 0;
timer_start();
@@ -3271,20 +3280,25 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
{
printf("\rGenerating (%d / %d tokens)", (kcpp_data->n_predict - remaining_tokens), kcpp_data->n_predict);
}
- if(debugmode==1 && top_picks.size()>0)
+ if(debugmode==1 && top_picks_history.size()>0)
{
printf(" [");
bool firstloop = true;
- for (auto & pick : top_picks)
+ TopPicksData toppick = top_picks_history[top_picks_history.size()-1];
+ std::string topstr = toppick.selected_token;
+ ::utreplace(topstr, "\n", "\\n");
+ printf("(%s %.2f%%)", RemoveBell(topstr).c_str(), toppick.selected_probability*100);
+ int maxtoshow = (toppick.tokenid.size()>4?4:toppick.tokenid.size());
+ for (int i=0;i tokens;
+ std::vector tokenid;
+ std::vector logprobs;
+ std::vector p;
+};
+
enum ModelLoadResult
{
FAIL = 0,
@@ -81,6 +93,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs);
bool gpttype_generate_abort();
const std::string & gpttype_get_pending_output();
std::vector gpttype_get_token_arr(const std::string & input, bool addbos);
+const std::vector gpttype_get_top_picks_data();
bool sdtype_load_model(const sd_load_model_inputs inputs);
sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs);