mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
improved EOT handling
This commit is contained in:
parent
d5d5dda02b
commit
4b664b3409
2 changed files with 29 additions and 13 deletions
|
@ -1576,6 +1576,18 @@ const std::string & gpttype_get_pending_output()
|
|||
return concat_output_reader_copy_poll;
|
||||
}
|
||||
|
||||
bool VecContainsIntVal(const std::vector<int> & vec, const int val)
|
||||
{
|
||||
for (const auto &matched : vec)
|
||||
{
|
||||
if (val == matched)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int GetThreadsToUse(bool blasmode)
|
||||
{
|
||||
if (blasmode)
|
||||
|
@ -2262,9 +2274,13 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
// decrement remaining sampling budget
|
||||
--remaining_tokens;
|
||||
|
||||
for (auto id : embd)
|
||||
for (auto eid : embd)
|
||||
{
|
||||
std::string tokenizedstr = FileFormatTokenizeID(id, file_format, inputs.render_special);
|
||||
std::string tokenizedstr = FileFormatTokenizeID(eid, file_format, inputs.render_special);
|
||||
if(!inputs.render_special && (eid==eosID || (eid==eotID && eid!=-1) || VecContainsIntVal(special_stop_sequence,id))) //extra filter to avoid unwanted special tokens
|
||||
{
|
||||
tokenizedstr = ""; //prevent render
|
||||
}
|
||||
if(stream_sse)
|
||||
{
|
||||
generated_tokens.push_back(tokenizedstr);
|
||||
|
@ -2302,7 +2318,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
stopper_unused_tokens = remaining_tokens;
|
||||
if(allow_regular_prints)
|
||||
{
|
||||
printf("\n(EOS token triggered!)");
|
||||
printf("\n(EOS token triggered! ID:%d)",id);
|
||||
}
|
||||
remaining_tokens = 0;
|
||||
last_stop_reason = stop_reason::EOS_TOKEN_HIT;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue