Implemented basic GPU offloading for MPT, GPT-2, GPT-J and GPT-NeoX

2025-09-11 01:24:36 +00:00 · 2023-06-22 00:43:25 +08:00 · 2023-06-22 00:43:25 +08:00 · 1b71752a9f
commit 1b71752a9f
parent b1f00fa9cc
6 changed files with 99 additions and 8 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -671,7 +671,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
    {
        if(file_format==FileFormat::NEOX_6|| file_format==FileFormat::NEOX_7)
        {
-            ModelLoadResult res = gpt_neox_model_load(params.model, neox_ctx_v3, vocab, file_format);
+            ModelLoadResult res = gpt_neox_model_load(params.model, neox_ctx_v3, vocab, file_format, inputs.gpulayers);
            if(res==ModelLoadResult::FAIL)
            {
                fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
@ -733,7 +733,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
    }
    else if(file_format==FileFormat::MPT_1)
    {
-        bool res = mpt_model_load(params.model, mpt_ctx_v3, vocab);
+        bool res = mpt_model_load(params.model, mpt_ctx_v3, vocab, inputs.gpulayers);
        if(res==false)
        {
            fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());