mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 01:41:37 +00:00
* fix corner case in sd_oai_transform_params Also fix typo in the function name. * support for customizing loaded LoRA multipliers The `sdloramult` flag now accepts a list of multipliers, one for each LoRA. If all multipliers are non-zero, LoRAs load as before, with no extra VRAM usage or performance impact. If any LoRA has a multiplier of 0, we switch to `at_runtime` mode, and these LoRAs will be available to multiplier changes via the `lora` sdapi field and show up in the `sdapi/v1/loras` endpoint. All LoRAs are still preloaded on startup, and cached to avoid file reloads. If the list of multipliers is shorter than the list of LoRAs, the multiplier list is extended with the first multiplier (1.0 by default), to keep it compatible with the previous behavior. * support for `<lora:name:multiplier>` prompt syntax and metadata * add a few tests for sanitize_lora_multipliers
379 lines
11 KiB
C++
379 lines
11 KiB
C++
#pragma once
|
|
#include <cstdint>
|
|
|
|
const int tensor_split_max = 16;
|
|
const int images_max = 8;
|
|
const int audio_max = 4;
|
|
const int logprobs_max = 10;
|
|
const int overridekv_max = 16;
|
|
|
|
// match kobold's sampler list and order
|
|
enum samplers
|
|
{
|
|
KCPP_SAMPLER_TOP_K=0,
|
|
KCPP_SAMPLER_TOP_A=1,
|
|
KCPP_SAMPLER_TOP_P=2,
|
|
KCPP_SAMPLER_TFS=3,
|
|
KCPP_SAMPLER_TYP=4,
|
|
KCPP_SAMPLER_TEMP=5,
|
|
KCPP_SAMPLER_REP_PEN=6,
|
|
KCPP_SAMPLER_MAX
|
|
};
|
|
enum stop_reason
|
|
{
|
|
ERROR_ENCOUNTERED=-2,
|
|
INVALID=-1,
|
|
OUT_OF_TOKENS=0,
|
|
EOS_TOKEN_HIT=1,
|
|
CUSTOM_STOPPER=2,
|
|
};
|
|
struct logit_bias {
|
|
int32_t token_id;
|
|
float bias;
|
|
};
|
|
struct load_model_inputs
|
|
{
|
|
const int threads = 0;
|
|
const int blasthreads = 0;
|
|
const int max_context_length = 0;
|
|
const bool low_vram = 0;
|
|
const bool use_mmq = 0;
|
|
const bool use_rowsplit = 0;
|
|
const char * executable_path = nullptr;
|
|
const char * model_filename = nullptr;
|
|
const char * lora_filename = nullptr;
|
|
const char * draftmodel_filename = nullptr;
|
|
const int draft_amount = 8;
|
|
const int draft_gpulayers = 999;
|
|
const float draft_gpusplit[tensor_split_max] = {};
|
|
const char * mmproj_filename = nullptr;
|
|
const bool mmproj_cpu = false;
|
|
const int visionmaxres = 2048;
|
|
const bool use_mmap = false;
|
|
const bool use_mlock = false;
|
|
const bool use_smartcontext = false;
|
|
const bool use_contextshift = false;
|
|
const bool use_fastforward = false;
|
|
const int kcpp_main_gpu = 0;
|
|
const char * vulkan_info = nullptr;
|
|
const int batchsize = 512;
|
|
const bool autofit = false;
|
|
const int autofit_tax_mb = 0;
|
|
const int gpulayers = 0;
|
|
const float rope_freq_scale = 1.0f;
|
|
const float rope_freq_base = 10000.0f;
|
|
const int overridenativecontext = 0;
|
|
const int moe_experts = -1;
|
|
const int moecpu = 0;
|
|
const bool no_bos_token = false;
|
|
const bool load_guidance = false;
|
|
const char * override_kv[overridekv_max] = {};
|
|
const char * override_tensors = nullptr;
|
|
const bool flash_attention = false;
|
|
const float tensor_split[tensor_split_max] = {};
|
|
const int quant_k = 0;
|
|
const int quant_v = 0;
|
|
const bool check_slowness = false;
|
|
const bool highpriority = false;
|
|
const bool swa_support = false;
|
|
const bool smartcache = false;
|
|
const int smartcacheslots = 0;
|
|
const bool pipelineparallel = false;
|
|
const float lora_multiplier = 1.0f;
|
|
const char * devices_override = nullptr;
|
|
const bool quiet = false;
|
|
const int debugmode = 0;
|
|
};
|
|
struct generation_inputs
|
|
{
|
|
const int seed = 0;
|
|
const char * prompt = nullptr;
|
|
const char * memory = nullptr;
|
|
const char * negative_prompt = nullptr;
|
|
const float guidance_scale = 1;
|
|
const char * images[images_max] = {};
|
|
const char * audio[audio_max] = {};
|
|
const int max_context_length = 0;
|
|
const int max_length = 0;
|
|
const float temperature = 0.0f;
|
|
const int top_k = 0;
|
|
const float top_a = 0.0f;
|
|
const float top_p = 0.0f;
|
|
const float min_p = 0.0f;
|
|
const float typical_p = 0;
|
|
const float tfs = 0;
|
|
const float nsigma = 0.0f;
|
|
const float rep_pen = 0;
|
|
const int rep_pen_range = 0;
|
|
const float rep_pen_slope = 1.0f;
|
|
const float presence_penalty = 0.0f;
|
|
const int mirostat = 0;
|
|
const float mirostat_tau = 0.0f;
|
|
const float mirostat_eta = 0.0f;
|
|
const float xtc_threshold = 0.0f;
|
|
const float xtc_probability = 0.0f;
|
|
const samplers sampler_order[KCPP_SAMPLER_MAX] = {};
|
|
const int sampler_len = 0;
|
|
const bool allow_eos_token = false;
|
|
const bool bypass_eos_token = false;
|
|
const bool tool_call_fix = false; //this prevents close square bracket ] from being generated early.
|
|
const bool render_special = false;
|
|
const bool stream_sse = false;
|
|
const char * grammar = nullptr;
|
|
const bool grammar_retain_state = false;
|
|
const float dynatemp_range = 0.0f;
|
|
const float dynatemp_exponent = 1.0f;
|
|
const float smoothing_factor = 0.0f;
|
|
const float smoothing_curve = 1.0f;
|
|
const float adaptive_target = -1.0f;
|
|
const float adaptive_decay = 0.9f;
|
|
const float dry_multiplier = 0.0f;
|
|
const float dry_base = 0.0f;
|
|
const int dry_allowed_length = 0;
|
|
const int dry_penalty_last_n = 0;
|
|
const int dry_sequence_breakers_len = 0;
|
|
const char ** dry_sequence_breakers = nullptr;
|
|
const int stop_sequence_len = 0;
|
|
const char ** stop_sequence = nullptr;
|
|
const int logit_biases_len = 0;
|
|
const logit_bias * logit_biases = nullptr;
|
|
const int banned_tokens_len = 0;
|
|
const char ** banned_tokens = nullptr;
|
|
};
|
|
struct generation_outputs
|
|
{
|
|
int status = -1;
|
|
int stopreason = stop_reason::INVALID;
|
|
int prompt_tokens = 0;
|
|
int completion_tokens = 0;
|
|
const char * text; //response will now be stored in c++ allocated memory
|
|
};
|
|
struct token_count_outputs
|
|
{
|
|
int count = 0;
|
|
int * ids; //we'll just use shared memory for this one, bit of a hack
|
|
};
|
|
|
|
struct logprob_item {
|
|
int option_count;
|
|
const char * selected_token;
|
|
float selected_logprob;
|
|
int32_t selected_token_id;
|
|
const char * tokens[logprobs_max];
|
|
int32_t token_ids[logprobs_max];
|
|
float * logprobs = nullptr;
|
|
};
|
|
struct last_logprobs_outputs {
|
|
int count = 0;
|
|
logprob_item * logprob_items = nullptr;
|
|
};
|
|
|
|
struct sd_load_model_inputs
|
|
{
|
|
const char * model_filename = nullptr;
|
|
const char * executable_path = nullptr;
|
|
const int kcpp_main_gpu = 0;
|
|
const char * vulkan_info = nullptr;
|
|
const int threads = 0;
|
|
const int quant = 0;
|
|
const bool flash_attention = false;
|
|
const bool offload_cpu = false;
|
|
const bool vae_cpu = false;
|
|
const bool clip_cpu = false;
|
|
const bool diffusion_conv_direct = false;
|
|
const bool vae_conv_direct = false;
|
|
const bool taesd = false;
|
|
const int tiled_vae_threshold = 0;
|
|
const char * t5xxl_filename = nullptr;
|
|
const char * clip1_filename = nullptr;
|
|
const char * clip2_filename = nullptr;
|
|
const char * vae_filename = nullptr;
|
|
const int lora_len = 0;
|
|
const char ** lora_filenames = nullptr;
|
|
const float * lora_multipliers = nullptr;
|
|
const int lora_apply_mode = 0;
|
|
const char * photomaker_filename = nullptr;
|
|
const char * upscaler_filename = nullptr;
|
|
const int img_hard_limit = 0;
|
|
const int img_soft_limit = 0;
|
|
const char * devices_override = nullptr;
|
|
const bool quiet = false;
|
|
const int debugmode = 0;
|
|
};
|
|
struct sd_generation_inputs
|
|
{
|
|
const char * prompt = nullptr;
|
|
const char * negative_prompt = nullptr;
|
|
const char * init_images = "";
|
|
const char * mask = "";
|
|
const int extra_images_len = 0;
|
|
const char ** extra_images = nullptr;
|
|
const bool flip_mask = false;
|
|
const float denoising_strength = 0.0f;
|
|
const float cfg_scale = 0.0f;
|
|
const float distilled_guidance = -1.0f;
|
|
const int shifted_timestep = 0;
|
|
const float flow_shift = 0.0f;
|
|
const int sample_steps = 0;
|
|
const int width = 0;
|
|
const int height = 0;
|
|
const int seed = 0;
|
|
const char * sample_method = nullptr;
|
|
const char * scheduler = nullptr;
|
|
const int clip_skip = -1;
|
|
const int vid_req_frames = 1;
|
|
const int video_output_type = 0; //0=gif, 1=avi, 2=both
|
|
const bool remove_limits = false;
|
|
const bool circular_x = false;
|
|
const bool circular_y = false;
|
|
const bool upscale = false;
|
|
const int lora_len = 0;
|
|
const float * lora_multipliers = nullptr;
|
|
};
|
|
struct sd_generation_outputs
|
|
{
|
|
int status = -1;
|
|
int animated = 0;
|
|
const char * data = "";
|
|
const char * data_extra = "";
|
|
};
|
|
struct sd_upscale_inputs
|
|
{
|
|
const char * init_images = "";
|
|
const int upscaling_resize = 0;
|
|
};
|
|
struct sd_info_outputs
|
|
{
|
|
int status = -1;
|
|
const char * data = "";
|
|
};
|
|
|
|
struct whisper_load_model_inputs
|
|
{
|
|
const char * model_filename = nullptr;
|
|
const char * executable_path = nullptr;
|
|
const int kcpp_main_gpu = 0;
|
|
const char * vulkan_info = nullptr;
|
|
const char * devices_override = nullptr;
|
|
const bool quiet = false;
|
|
const int debugmode = 0;
|
|
};
|
|
struct whisper_generation_inputs
|
|
{
|
|
const char * prompt = nullptr;
|
|
const char * audio_data = nullptr;
|
|
const bool suppress_non_speech = false;
|
|
const char * langcode = nullptr;
|
|
};
|
|
struct whisper_generation_outputs
|
|
{
|
|
int status = -1;
|
|
const char * text = "";
|
|
};
|
|
|
|
struct tts_load_model_inputs
|
|
{
|
|
const int threads = 4;
|
|
const char * ttc_model_filename = nullptr;
|
|
const char * cts_model_filename = nullptr;
|
|
const char * executable_path = nullptr;
|
|
const int kcpp_main_gpu = 0;
|
|
const char * vulkan_info = nullptr;
|
|
const int gpulayers = 0;
|
|
const bool flash_attention = false;
|
|
const int ttsmaxlen = 4096;
|
|
const char * devices_override = nullptr;
|
|
const bool quiet = false;
|
|
const int debugmode = 0;
|
|
};
|
|
struct tts_generation_inputs
|
|
{
|
|
const char * prompt = nullptr;
|
|
const int speaker_seed = 0;
|
|
const int audio_seed = 0;
|
|
const char * custom_speaker_voice = "";
|
|
const char * custom_speaker_text = "";
|
|
const char * custom_speaker_data = "";
|
|
const char * reference_audio = "";
|
|
};
|
|
struct tts_generation_outputs
|
|
{
|
|
int status = -1;
|
|
const char * data = "";
|
|
};
|
|
|
|
struct embeddings_load_model_inputs
|
|
{
|
|
const int threads = 4;
|
|
const char * model_filename = nullptr;
|
|
const char * executable_path = nullptr;
|
|
const int kcpp_main_gpu = 0;
|
|
const char * vulkan_info = nullptr;
|
|
const int gpulayers = 0;
|
|
const bool flash_attention = false;
|
|
const bool use_mmap = false;
|
|
const int embeddingsmaxctx = 0;
|
|
const char * devices_override = nullptr;
|
|
const bool quiet = false;
|
|
const int debugmode = 0;
|
|
};
|
|
struct embeddings_generation_inputs
|
|
{
|
|
const char * prompt = nullptr;
|
|
const bool truncate = true;
|
|
};
|
|
struct embeddings_generation_outputs
|
|
{
|
|
int status = -1;
|
|
int count = 0;
|
|
const char * data = "";
|
|
};
|
|
|
|
struct music_load_model_inputs
|
|
{
|
|
const char * musicllm_filename = nullptr;
|
|
const char * musicembedding_filename = nullptr;
|
|
const char * musicdiffusion_filename = nullptr;
|
|
const char * musicvae_filename = nullptr;
|
|
const bool lowvram = false;
|
|
const char * executable_path = nullptr;
|
|
const int kcpp_main_gpu = 0;
|
|
const char * vulkan_info = nullptr;
|
|
const char * devices_override = nullptr;
|
|
const bool quiet = false;
|
|
const int debugmode = 0;
|
|
};
|
|
struct music_generation_inputs
|
|
{
|
|
const bool is_planner_mode = false; //if true, generate codes, else, generate diffusion music
|
|
const bool stereo = false;
|
|
const bool gen_codes = false;
|
|
const char * input_json = nullptr;
|
|
};
|
|
struct music_generation_outputs
|
|
{
|
|
int status = -1;
|
|
const char * music_output_json = "";
|
|
const char * data = "";
|
|
};
|
|
|
|
extern std::string executable_path;
|
|
extern std::string lora_filename;
|
|
extern std::string mmproj_filename;
|
|
extern std::string draftmodel_filename;
|
|
extern std::vector<std::string> generated_tokens;
|
|
extern bool generation_finished;
|
|
extern bool audio_multimodal_supported;
|
|
extern bool vision_multimodal_supported;
|
|
extern float last_eval_time;
|
|
extern float last_process_time;
|
|
extern int last_token_count;
|
|
extern int last_input_count;
|
|
extern int last_seed;
|
|
extern int total_gens;
|
|
extern int total_img_gens;
|
|
extern int total_tts_gens;
|
|
extern int total_transcribe_gens;
|
|
extern int last_draft_success;
|
|
extern int last_draft_failed;
|
|
extern stop_reason last_stop_reason;
|