mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # .devops/tools.sh # README.md
This commit is contained in:
commit
95bbd46019
8 changed files with 757 additions and 36 deletions
15
llama.cpp
15
llama.cpp
|
@ -491,6 +491,8 @@ struct llama_file_loader {
|
|||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q4_2:
|
||||
case GGML_TYPE_Q4_3:
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_Q8_0:
|
||||
break;
|
||||
default: {
|
||||
|
@ -566,6 +568,8 @@ struct llama_file_saver {
|
|||
case GGML_TYPE_Q4_1:
|
||||
case GGML_TYPE_Q4_2:
|
||||
case GGML_TYPE_Q4_3:
|
||||
case GGML_TYPE_Q5_0:
|
||||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_Q8_0:
|
||||
break;
|
||||
default: LLAMA_ASSERT(false);
|
||||
|
@ -857,6 +861,8 @@ static const char *llama_ftype_name(enum llama_ftype ftype) {
|
|||
return "mostly Q4_1, some F16";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_2: return "mostly Q4_2";
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_3: return "mostly Q4_3";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_0: return "mostly Q5_0";
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_1: return "mostly Q5_1";
|
||||
case LLAMA_FTYPE_MOSTLY_Q8_0: return "mostly Q8_0";
|
||||
default: return "unknown, may not work";
|
||||
}
|
||||
|
@ -1595,6 +1601,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
case LLAMA_FTYPE_MOSTLY_Q4_1: quantized_type = GGML_TYPE_Q4_1; break;
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_2: quantized_type = GGML_TYPE_Q4_2; break;
|
||||
case LLAMA_FTYPE_MOSTLY_Q4_3: quantized_type = GGML_TYPE_Q4_3; break;
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break;
|
||||
case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break;
|
||||
case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break;
|
||||
default: throw format("invalid output file type %d\n", ftype);
|
||||
};
|
||||
|
@ -2089,6 +2097,13 @@ int llama_get_kv_cache_token_count(struct llama_context * ctx) {
|
|||
|
||||
#define LLAMA_MAX_RNG_STATE 64*1024
|
||||
|
||||
void llama_set_rng_seed(struct llama_context * ctx, int seed) {
|
||||
if (seed <= 0) {
|
||||
seed = time(NULL);
|
||||
}
|
||||
ctx->rng.seed(seed);
|
||||
}
|
||||
|
||||
// Returns the size of the state
|
||||
size_t llama_get_state_size(struct llama_context * ctx) {
|
||||
// we don't know size of rng until we actually serialize it. so reserve more than enough memory for its serialized state.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue