mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 17:44:38 +00:00
* move Dynatemp changes to new branch * fix float header * Properly reintroduce variable expert count Controllable through experts.txt * first pass at DynaTemp UI Checkbox partial implemented, Min and Max Temp implemented * DynaTemp UI Checkbox Trigger DynaTemp on checkbox * DynaTemp UI checkbox edition Hell Yeah! DynaTemp! * Remove greedy dynatemp * Fix race condition caused by debug print * Fixed broken presets and miro Fixes broken presets and mirostat * Remove debug function + HHI temp Also removed unnecessary softmax double precision * Fix whitespace (?) for generate function * epic upstream renaming scheme fix * fix stupid indents * Other cleanup Reintroduce unused rep pen function, move temp functions first before entropy dynamic temp * Slight indent fix * revert batch pyinstaller maker to mainline and also delete experts.txt since adjustable routing is also being removed for the PR * compact dynatemp into a single value dynatemp_range. This is a float which represents the allowed deviation from the min and max temperature when using dynatemp. Thus, if we want a value of dynatemp_min=0.3, dynatemp_max=0.5, then we would simply set temperature=0.4 and dynatemp_range=0.1. Functionally dynatemp would operate the same, but it would simplify usage and make it a single easy to adjust value. --------- Co-authored-by: Alexander Abushady <aabushady214@gmail.com> Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
109 lines
2.8 KiB
C++
109 lines
2.8 KiB
C++
#pragma once
|
|
|
|
const int stop_token_max = 16;
|
|
const int ban_token_max = 16;
|
|
const int tensor_split_max = 16;
|
|
const int logit_bias_max = 16;
|
|
// match kobold's sampler list and order
|
|
enum samplers
|
|
{
|
|
KCPP_SAMPLER_TOP_K=0,
|
|
KCPP_SAMPLER_TOP_A=1,
|
|
KCPP_SAMPLER_TOP_P=2,
|
|
KCPP_SAMPLER_TFS=3,
|
|
KCPP_SAMPLER_TYP=4,
|
|
KCPP_SAMPLER_TEMP=5,
|
|
KCPP_SAMPLER_REP_PEN=6,
|
|
KCPP_SAMPLER_MAX
|
|
};
|
|
enum stop_reason
|
|
{
|
|
INVALID=-1,
|
|
OUT_OF_TOKENS=0,
|
|
EOS_TOKEN=1,
|
|
CUSTOM_STOPPER=2,
|
|
};
|
|
struct logit_bias {
|
|
int32_t token_id;
|
|
float bias;
|
|
};
|
|
struct load_model_inputs
|
|
{
|
|
const int threads;
|
|
const int blasthreads;
|
|
const int max_context_length;
|
|
const bool low_vram;
|
|
const bool use_mmq;
|
|
const char * executable_path;
|
|
const char * model_filename;
|
|
const char * lora_filename;
|
|
const char * lora_base;
|
|
const bool use_mmap;
|
|
const bool use_mlock;
|
|
const bool use_smartcontext;
|
|
const bool use_contextshift;
|
|
const int clblast_info = 0;
|
|
const int cublas_info = 0;
|
|
const int blasbatchsize = 512;
|
|
const int debugmode = 0;
|
|
const int forceversion = 0;
|
|
const int gpulayers = 0;
|
|
const float rope_freq_scale = 1.0f;
|
|
const float rope_freq_base = 10000.0f;
|
|
const char * banned_tokens[ban_token_max];
|
|
const float tensor_split[tensor_split_max];
|
|
};
|
|
struct generation_inputs
|
|
{
|
|
const int seed;
|
|
const char * prompt;
|
|
const char * memory;
|
|
const int max_context_length;
|
|
const int max_length;
|
|
const float temperature;
|
|
const int top_k;
|
|
const float top_a = 0.0f;
|
|
const float top_p;
|
|
const float min_p = 0.0f;
|
|
const float typical_p;
|
|
const float tfs;
|
|
const float rep_pen;
|
|
const int rep_pen_range;
|
|
const float presence_penalty = 0.0f;
|
|
const int mirostat = 0;
|
|
const float mirostat_eta;
|
|
const float mirostat_tau;
|
|
const samplers sampler_order[KCPP_SAMPLER_MAX];
|
|
const int sampler_len;
|
|
const bool unban_tokens_rt;
|
|
const char * stop_sequence[stop_token_max];
|
|
const bool stream_sse;
|
|
const char * grammar;
|
|
const bool grammar_retain_state;
|
|
const bool quiet = false;
|
|
const float dynatemp_range = 0.0f;
|
|
const logit_bias logit_biases[logit_bias_max];
|
|
|
|
};
|
|
struct generation_outputs
|
|
{
|
|
int status = -1;
|
|
char text[32768]; //32kb should be enough for any response
|
|
};
|
|
struct token_count_outputs
|
|
{
|
|
int count = 0;
|
|
int * ids; //we'll just use shared memory for this one, bit of a hack
|
|
};
|
|
|
|
extern std::string executable_path;
|
|
extern std::string lora_filename;
|
|
extern std::string lora_base;
|
|
extern std::vector<std::string> generated_tokens;
|
|
extern bool generation_finished;
|
|
extern float last_eval_time;
|
|
extern float last_process_time;
|
|
extern int last_token_count;
|
|
extern int last_seed;
|
|
extern int total_gens;
|
|
extern stop_reason last_stop_reason;
|