mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-09 16:44:35 +00:00
wip logprobs data
This commit is contained in:
parent
bd05efd648
commit
90f5cd0f67
6 changed files with 70 additions and 19 deletions
|
@ -48,7 +48,7 @@
|
|||
"source": [
|
||||
"#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\r\n",
|
||||
"\r\n",
|
||||
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n",
|
||||
"Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf\",\"https://huggingface.co/KoboldAI/LLaMA2-13B-Estopia-GGUF/resolve/main/LLaMA2-13B-Estopia.Q4_K_S.gguf\",\"https://huggingface.co/mradermacher/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/mini-magnum-12b-v1.1-GGUF/resolve/main/mini-magnum-12b-v1.1.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/bartowski/Rocinante-12B-v1.1-GGUF/resolve/main/Rocinante-12B-v1.1-Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/MistRP-Airoboros-7B-GGUF/resolve/main/mistrp-airoboros-7b.Q4_K_S.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/concedo/KobbleTinyV2-1.1B-GGUF/resolve/main/KobbleTiny-Q4_K.gguf\",\"https://huggingface.co/grimjim/kukulemon-7B-GGUF/resolve/main/kukulemon-7B.Q8_0.gguf\",\"https://huggingface.co/mradermacher/LemonKunoichiWizardV3-GGUF/resolve/main/LemonKunoichiWizardV3.Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix/resolve/main/Kunoichi-DPO-v2-7B-Q4_K_M-imatrix.gguf\",\"https://huggingface.co/mradermacher/L3-8B-Stheno-v3.2-i1-GGUF/resolve/main/L3-8B-Stheno-v3.2.i1-Q4_K_M.gguf\",\"https://huggingface.co/Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix/resolve/main/v2-Llama-3-Lumimaid-8B-v0.1-OAS-Q4_K_M-imat.gguf\",\"https://huggingface.co/bartowski/NeuralDaredevil-8B-abliterated-GGUF/resolve/main/NeuralDaredevil-8B-abliterated-Q4_K_M.gguf\",\"https://huggingface.co/bartowski/L3-8B-Lunaris-v1-GGUF/resolve/main/L3-8B-Lunaris-v1-Q4_K_M.gguf\",\"https://huggingface.co/mradermacher/L3-Umbral-Mind-RP-v2.0-8B-GGUF/resolve/main/L3-Umbral-Mind-RP-v2.0-8B.Q4_K_M.gguf\"]{allow-input: true}\r\n",
|
||||
"Layers = 99 #@param [99]{allow-input: true}\r\n",
|
||||
"ContextSize = 4096 #@param [4096,8192] {allow-input: true}\r\n",
|
||||
"FlashAttention = True #@param {type:\"boolean\"}\r\n",
|
||||
|
|
|
@ -294,5 +294,13 @@ extern "C"
|
|||
return output;
|
||||
}
|
||||
|
||||
last_logprobs_outputs last_logprobs()
|
||||
{
|
||||
last_logprobs_outputs output;
|
||||
std::vector<TopPicksData> toppicks = gpttype_get_top_picks_data(); //copy top picks
|
||||
output.count = 0;
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
7
expose.h
7
expose.h
|
@ -118,6 +118,13 @@ struct token_count_outputs
|
|||
int count = 0;
|
||||
int * ids; //we'll just use shared memory for this one, bit of a hack
|
||||
};
|
||||
struct last_logprobs_outputs {
|
||||
int count = 0;
|
||||
char ** selected_token;
|
||||
float * selected_logprob;
|
||||
char * tokens[5];
|
||||
float * logprobs[5];
|
||||
};
|
||||
struct sd_load_model_inputs
|
||||
{
|
||||
const char * model_filename = nullptr;
|
||||
|
|
|
@ -114,7 +114,7 @@ static std::vector<std::string> banned_phrases;
|
|||
static std::unordered_multimap<gpt_vocab::id, std::vector<gpt_vocab::id>> dry_sequence_breakers; // Multi-mapping from first token of sequence to tail of sequence (tail is empty for a single token)
|
||||
static std::vector<int> dry_repeat_count; // Indexed as last_n_tokens
|
||||
static std::unordered_map<gpt_vocab::id, int> dry_max_token_repeat;
|
||||
static std::vector<llama_token_data> top_picks;
|
||||
static std::vector<TopPicksData> top_picks_history;
|
||||
static int remaining_tokens = 0;
|
||||
static int stopper_unused_tokens = 0;
|
||||
static std::mutex concat_output_mtx;
|
||||
|
@ -587,7 +587,8 @@ llama_token sample_token(llama_token_data_array * candidates, std::mt19937 & rng
|
|||
sample_softmax(candidates);
|
||||
std::vector<float> probs;
|
||||
probs.reserve(candidates->size);
|
||||
top_picks.clear();
|
||||
TopPicksData newpick;
|
||||
|
||||
for (size_t i = 0; i < candidates->size; ++i) {
|
||||
probs.push_back(candidates->data[i].p);
|
||||
}
|
||||
|
@ -595,18 +596,20 @@ llama_token sample_token(llama_token_data_array * candidates, std::mt19937 & rng
|
|||
std::discrete_distribution<> dist(probs.begin(), probs.end());
|
||||
int idx = dist(rng);
|
||||
|
||||
if(debugmode==1)
|
||||
newpick.selected_token = FileFormatTokenizeID(candidates->data[idx].id, file_format, true);
|
||||
newpick.selected_logprob = candidates->data[idx].logit;
|
||||
newpick.selected_probability = candidates->data[idx].p;
|
||||
newpick.selected_tokenid = candidates->data[idx].id;
|
||||
for (size_t i = 0; (i < candidates->size && i<5); ++i)
|
||||
{
|
||||
top_picks.push_back(candidates->data[idx]);
|
||||
for (size_t i = 0; (i < candidates->size && i<4); ++i)
|
||||
{
|
||||
if(i!=idx)
|
||||
{
|
||||
top_picks.push_back(candidates->data[i]);
|
||||
}
|
||||
}
|
||||
newpick.tokens.push_back(FileFormatTokenizeID(candidates->data[i].id, file_format, true));
|
||||
newpick.logprobs.push_back(candidates->data[i].logit);
|
||||
newpick.p.push_back(candidates->data[i].p);
|
||||
newpick.tokenid.push_back(candidates->data[i].id);
|
||||
}
|
||||
|
||||
top_picks_history.push_back(newpick);
|
||||
|
||||
llama_token result = candidates->data[idx].id;
|
||||
return result;
|
||||
}
|
||||
|
@ -2422,6 +2425,11 @@ const std::string & gpttype_get_pending_output()
|
|||
return concat_output_reader_copy_poll;
|
||||
}
|
||||
|
||||
const std::vector<TopPicksData> gpttype_get_top_picks_data()
|
||||
{
|
||||
return top_picks_history;
|
||||
}
|
||||
|
||||
bool VecContainsIntVal(const std::vector<int> & vec, const int val)
|
||||
{
|
||||
for (const auto &matched : vec)
|
||||
|
@ -2484,6 +2492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
dry_repeat_count.clear();
|
||||
dry_sequence_breakers.clear();
|
||||
dry_max_token_repeat.clear();
|
||||
top_picks_history.clear();
|
||||
|
||||
double time0 = 0, time1 = 0, time2 = 0;
|
||||
timer_start();
|
||||
|
@ -3271,20 +3280,25 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
{
|
||||
printf("\rGenerating (%d / %d tokens)", (kcpp_data->n_predict - remaining_tokens), kcpp_data->n_predict);
|
||||
}
|
||||
if(debugmode==1 && top_picks.size()>0)
|
||||
if(debugmode==1 && top_picks_history.size()>0)
|
||||
{
|
||||
printf(" [");
|
||||
bool firstloop = true;
|
||||
for (auto & pick : top_picks)
|
||||
TopPicksData toppick = top_picks_history[top_picks_history.size()-1];
|
||||
std::string topstr = toppick.selected_token;
|
||||
::utreplace(topstr, "\n", "\\n");
|
||||
printf("(%s %.2f%%)", RemoveBell(topstr).c_str(), toppick.selected_probability*100);
|
||||
int maxtoshow = (toppick.tokenid.size()>4?4:toppick.tokenid.size());
|
||||
for (int i=0;i<maxtoshow;++i)
|
||||
{
|
||||
if (!firstloop)
|
||||
if(toppick.tokenid[i]==toppick.selected_tokenid)
|
||||
{
|
||||
printf(" ");
|
||||
continue;
|
||||
}
|
||||
firstloop = false;
|
||||
std::string tokenizedstr = FileFormatTokenizeID(pick.id, file_format, true);
|
||||
printf(" ");
|
||||
std::string tokenizedstr = toppick.tokens[i];
|
||||
::utreplace(tokenizedstr, "\n", "\\n");
|
||||
printf("(%s %.2f%%)", RemoveBell(tokenizedstr).c_str(), pick.p*100);
|
||||
printf("(%s %.2f%%)", RemoveBell(tokenizedstr).c_str(), toppick.p[i]*100);
|
||||
}
|
||||
printf("]\n");
|
||||
}
|
||||
|
|
|
@ -101,6 +101,14 @@ class token_count_outputs(ctypes.Structure):
|
|||
_fields_ = [("count", ctypes.c_int),
|
||||
("ids", ctypes.POINTER(ctypes.c_int))]
|
||||
|
||||
# returns top 5 logprobs per token
|
||||
class last_logprobs_outputs(ctypes.Structure):
|
||||
_fields_ = [("count", ctypes.c_int),
|
||||
("selected_token", ctypes.POINTER(ctypes.c_char_p)),
|
||||
("selected_logprob", ctypes.POINTER(ctypes.c_float)),
|
||||
("tokens", ctypes.POINTER(5 * ctypes.c_char_p)),
|
||||
("logprobs", ctypes.POINTER(5 * ctypes.c_float))]
|
||||
|
||||
class load_model_inputs(ctypes.Structure):
|
||||
_fields_ = [("threads", ctypes.c_int),
|
||||
("blasthreads", ctypes.c_int),
|
||||
|
@ -445,6 +453,7 @@ def init_library():
|
|||
handle.whisper_load_model.restype = ctypes.c_bool
|
||||
handle.whisper_generate.argtypes = [whisper_generation_inputs]
|
||||
handle.whisper_generate.restype = whisper_generation_outputs
|
||||
handle.last_logprobs.restype = last_logprobs_outputs
|
||||
|
||||
def set_backend_props(inputs):
|
||||
clblastids = 0
|
||||
|
|
|
@ -69,6 +69,18 @@ struct FileFormatExtraMeta
|
|||
int n_expert_count = 0;
|
||||
};
|
||||
|
||||
struct TopPicksData
|
||||
{
|
||||
std::string selected_token;
|
||||
int32_t selected_tokenid;
|
||||
float selected_logprob;
|
||||
float selected_probability;
|
||||
std::vector<std::string> tokens;
|
||||
std::vector<int> tokenid;
|
||||
std::vector<float> logprobs;
|
||||
std::vector<float> p;
|
||||
};
|
||||
|
||||
enum ModelLoadResult
|
||||
{
|
||||
FAIL = 0,
|
||||
|
@ -81,6 +93,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs);
|
|||
bool gpttype_generate_abort();
|
||||
const std::string & gpttype_get_pending_output();
|
||||
std::vector<int> gpttype_get_token_arr(const std::string & input, bool addbos);
|
||||
const std::vector<TopPicksData> gpttype_get_top_picks_data();
|
||||
|
||||
bool sdtype_load_model(const sd_load_model_inputs inputs);
|
||||
sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue