mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 02:19:41 +00:00
Interim merge. Do not use.
Merge branch 'master' into concedo_experimental # Conflicts: # README.md # SHA256SUMS # examples/quantize/quantize.cpp # ggml-opencl.c # ggml.c # ggml.h # llama.cpp # llama.h
This commit is contained in:
commit
e9caff1cda
18 changed files with 14233 additions and 1923 deletions
25
llama.cpp
25
llama.cpp
|
@ -402,6 +402,7 @@ enum llama_file_version {
|
|||
LLAMA_FILE_VERSION_GGML,
|
||||
LLAMA_FILE_VERSION_GGMF_V1, // added version field and scores in vocab
|
||||
LLAMA_FILE_VERSION_GGJT_V1, // added padding
|
||||
LLAMA_FILE_VERSION_GGJT_V2, // changed quantization format
|
||||
};
|
||||
|
||||
struct llama_file_loader {
|
||||
|
@ -432,6 +433,8 @@ struct llama_file_loader {
|
|||
file_version = LLAMA_FILE_VERSION_GGMF_V1;
|
||||
} else if (magic == 'ggjt' && version == 1) {
|
||||
file_version = LLAMA_FILE_VERSION_GGJT_V1;
|
||||
} else if (magic == 'ggjt' && version == 2) {
|
||||
file_version = LLAMA_FILE_VERSION_GGJT_V2;
|
||||
} else {
|
||||
throw format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
|
||||
magic, version);
|
||||
|
@ -535,8 +538,8 @@ struct llama_file_saver {
|
|||
write_vocab();
|
||||
}
|
||||
void write_magic() {
|
||||
file.write_u32('ggjt'); // magic
|
||||
file.write_u32(1); // version
|
||||
file.write_u32(LLAMA_FILE_MAGIC); // magic
|
||||
file.write_u32(LLAMA_FILE_VERSION); // version
|
||||
}
|
||||
void write_hparams(enum llama_ftype new_ftype) {
|
||||
const llama_hparams & hparams = any_file_loader->hparams;
|
||||
|
@ -848,9 +851,11 @@ static const char *llama_file_version_name(llama_file_version version) {
|
|||
switch (version) {
|
||||
case LLAMA_FILE_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)";
|
||||
case LLAMA_FILE_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)";
|
||||
case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (latest)";
|
||||
default: LLAMA_ASSERT(false);
|
||||
case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (pre #1405)";
|
||||
case LLAMA_FILE_VERSION_GGJT_V2: return "ggjt v2 (latest)";
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
static const char *llama_ftype_name(enum llama_ftype ftype) {
|
||||
|
@ -928,6 +933,14 @@ static void llama_model_load_internal(
|
|||
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
|
||||
}
|
||||
|
||||
if (file_version != LLAMA_FILE_VERSION_GGJT_V2) {
|
||||
if (hparams.ftype != LLAMA_FTYPE_ALL_F32 &&
|
||||
hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 &&
|
||||
hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
|
||||
throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1305)");
|
||||
}
|
||||
}
|
||||
|
||||
if (vocab_only) {
|
||||
return;
|
||||
}
|
||||
|
@ -2824,9 +2837,9 @@ void llama_print_timings(struct llama_context * ctx) {
|
|||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us / 1000.0);
|
||||
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample);
|
||||
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample);
|
||||
fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval);
|
||||
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval);
|
||||
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval);
|
||||
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue