mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
clean and refactoring pass before supporting newer models for different arch
This commit is contained in:
parent
60ee00428b
commit
90fe9096b4
12 changed files with 81 additions and 367 deletions
|
@ -21,7 +21,7 @@ struct gptj_hparams {
|
|||
int32_t n_head = 16;
|
||||
int32_t n_layer = 28;
|
||||
int32_t n_rot = 64;
|
||||
int32_t f16 = 1;
|
||||
int32_t ftype = 1;
|
||||
};
|
||||
|
||||
// quantize a model
|
||||
|
@ -64,14 +64,20 @@ bool gptj_model_quantize(const std::string & fname_inp, const std::string & fnam
|
|||
finp.read((char *) &hparams.n_head, sizeof(hparams.n_head));
|
||||
finp.read((char *) &hparams.n_layer, sizeof(hparams.n_layer));
|
||||
finp.read((char *) &hparams.n_rot, sizeof(hparams.n_rot));
|
||||
finp.read((char *) &hparams.f16, sizeof(hparams.f16));
|
||||
finp.read((char *) &hparams.ftype, sizeof(hparams.ftype));
|
||||
|
||||
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
||||
printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx);
|
||||
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
||||
printf("%s: n_head = %d\n", __func__, hparams.n_head);
|
||||
printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
|
||||
printf("%s: f16 = %d\n", __func__, hparams.f16);
|
||||
const int32_t qntvr_src = hparams.ftype / GGML_QNT_VERSION_FACTOR;
|
||||
const int32_t ftype_dst = GGML_QNT_VERSION * GGML_QNT_VERSION_FACTOR + ftype;
|
||||
|
||||
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
|
||||
printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx);
|
||||
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
|
||||
printf("%s: n_head = %d\n", __func__, hparams.n_head);
|
||||
printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
|
||||
printf("%s: ftype (src) = %d\n", __func__, hparams.ftype);
|
||||
printf("%s: qntvr (src) = %d\n", __func__, qntvr_src);
|
||||
printf("%s: ftype (dst) = %d\n", __func__, ftype_dst);
|
||||
printf("%s: qntvr (dst) = %d\n", __func__, GGML_QNT_VERSION);
|
||||
|
||||
fout.write((char *) &hparams.n_vocab, sizeof(hparams.n_vocab));
|
||||
fout.write((char *) &hparams.n_ctx, sizeof(hparams.n_ctx));
|
||||
|
@ -79,7 +85,7 @@ bool gptj_model_quantize(const std::string & fname_inp, const std::string & fnam
|
|||
fout.write((char *) &hparams.n_head, sizeof(hparams.n_head));
|
||||
fout.write((char *) &hparams.n_layer, sizeof(hparams.n_layer));
|
||||
fout.write((char *) &hparams.n_rot, sizeof(hparams.n_rot));
|
||||
fout.write((char *) &ftype, sizeof(hparams.f16));
|
||||
fout.write((char *) &ftype_dst, sizeof(ftype_dst));
|
||||
}
|
||||
|
||||
// load vocab
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue