Implemented basic GPU offloading for MPT, GPT-2, GPT-J and GPT-NeoX

This commit is contained in:
Concedo 2023-06-22 00:43:25 +08:00
parent b1f00fa9cc
commit 1b71752a9f
6 changed files with 99 additions and 8 deletions

View file

@ -671,7 +671,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
{
if(file_format==FileFormat::NEOX_6|| file_format==FileFormat::NEOX_7)
{
ModelLoadResult res = gpt_neox_model_load(params.model, neox_ctx_v3, vocab, file_format);
ModelLoadResult res = gpt_neox_model_load(params.model, neox_ctx_v3, vocab, file_format, inputs.gpulayers);
if(res==ModelLoadResult::FAIL)
{
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
@ -733,7 +733,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
}
else if(file_format==FileFormat::MPT_1)
{
bool res = mpt_model_load(params.model, mpt_ctx_v3, vocab);
bool res = mpt_model_load(params.model, mpt_ctx_v3, vocab, inputs.gpulayers);
if(res==false)
{
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());