mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-12 14:11:27 +00:00
Merge remote-tracking branch 'origin/master' into opencl-dev
This commit is contained in:
commit
fb638fa817
18 changed files with 785 additions and 435 deletions
23
llama.cpp
23
llama.cpp
|
|
@ -408,6 +408,7 @@ enum llama_file_version {
|
|||
LLAMA_FILE_VERSION_GGMF_V1, // added version field and scores in vocab
|
||||
LLAMA_FILE_VERSION_GGJT_V1, // added padding
|
||||
LLAMA_FILE_VERSION_GGJT_V2, // changed quantization format
|
||||
LLAMA_FILE_VERSION_GGJT_V3, // changed Q4 and Q8 quantization format
|
||||
};
|
||||
|
||||
struct llama_file_loader {
|
||||
|
|
@ -440,6 +441,8 @@ struct llama_file_loader {
|
|||
file_version = LLAMA_FILE_VERSION_GGJT_V1;
|
||||
} else if (magic == 'ggjt' && version == 2) {
|
||||
file_version = LLAMA_FILE_VERSION_GGJT_V2;
|
||||
} else if (magic == 'ggjt' && version == 3) {
|
||||
file_version = LLAMA_FILE_VERSION_GGJT_V3;
|
||||
} else {
|
||||
throw format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
|
||||
magic, version);
|
||||
|
|
@ -814,10 +817,9 @@ static bool kv_cache_init(
|
|||
struct llama_context_params llama_context_default_params() {
|
||||
struct llama_context_params result = {
|
||||
/*.n_ctx =*/ 512,
|
||||
/*.n_parts =*/ -1,
|
||||
/*.gpu_layers =*/ 0,
|
||||
/*.seed =*/ -1,
|
||||
/*.f16_kv =*/ false,
|
||||
/*.f16_kv =*/ true,
|
||||
/*.logits_all =*/ false,
|
||||
/*.vocab_only =*/ false,
|
||||
/*.use_mmap =*/ true,
|
||||
|
|
@ -847,7 +849,8 @@ static const char *llama_file_version_name(llama_file_version version) {
|
|||
case LLAMA_FILE_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)";
|
||||
case LLAMA_FILE_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)";
|
||||
case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (pre #1405)";
|
||||
case LLAMA_FILE_VERSION_GGJT_V2: return "ggjt v2 (latest)";
|
||||
case LLAMA_FILE_VERSION_GGJT_V2: return "ggjt v2 (pre #1508)";
|
||||
case LLAMA_FILE_VERSION_GGJT_V3: return "ggjt v3 (latest)";
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
|
|
@ -927,11 +930,19 @@ static void llama_model_load_internal(
|
|||
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
|
||||
}
|
||||
|
||||
if (file_version != LLAMA_FILE_VERSION_GGJT_V2) {
|
||||
if (file_version < LLAMA_FILE_VERSION_GGJT_V2) {
|
||||
if (hparams.ftype != LLAMA_FTYPE_ALL_F32 &&
|
||||
hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 &&
|
||||
hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
|
||||
throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1305)");
|
||||
throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)");
|
||||
}
|
||||
}
|
||||
|
||||
if (file_version < LLAMA_FILE_VERSION_GGJT_V3) {
|
||||
if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
|
||||
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
|
||||
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
|
||||
throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -944,7 +955,7 @@ static void llama_model_load_internal(
|
|||
size_t ctx_size;
|
||||
size_t mmapped_size;
|
||||
ml->calc_sizes(&ctx_size, &mmapped_size);
|
||||
fprintf(stderr, "%s: ggml ctx size = %6.2f KB\n", __func__, ctx_size/1024.0);
|
||||
fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/1024.0/1024.0);
|
||||
|
||||
// print memory requirements
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue