fixes to stopper tokens, fixed BLAS mode for GPT2 and GPTJ, updated kobold lite

This commit is contained in:
Concedo 2023-04-16 21:54:18 +08:00
parent 6548d3b3fb
commit c757fbee1d
6 changed files with 17 additions and 14 deletions

View file

@ -240,13 +240,13 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
// decrement remaining sampling budget
--remaining_tokens;
//printf("\nid:%d word:%s\n",id,llama_token_to_str(ctx, id));
concat_output += llama_token_to_str(ctx, id);
concat_output += llama_token_to_str(ctx, id);
for (const auto &matched : stop_sequence)
{
if (concat_output.find(matched) != std::string::npos)
{
remaining_tokens = 0;
printf("\n(Stop sequence triggered)");
printf("\n(Stop sequence triggered: %s)",matched.c_str());
break;
}
}