fixed incorrect auto gpu settings, fixed clblast not working

2025-09-11 01:24:36 +00:00 · 2024-09-21 17:59:52 +08:00 · 2024-09-21 17:59:52 +08:00 · 229108f877
commit 229108f877
parent 004a35b16d
2 changed files with 9 additions and 3 deletions
--- a/ggml/src/ggml-backend.c
+++ b/ggml/src/ggml-backend.c
@ -1174,6 +1174,8 @@ static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML
 #define GET_CAUSE(node) ""
 #endif

+static bool backend_prealloc_warn = false;
+
 // returns the backend that should be used for the node based on the current locations
 static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
    // TODO: use supports_op to check if the backend supports the op
@ -1196,7 +1198,11 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st

    if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
        // since the tensor is pre-allocated, it cannot be moved to another backend
-        GGML_ABORT("pre-allocated tensor in a backend that cannot run the operation");
+        if(!backend_prealloc_warn)
+        {
+            backend_prealloc_warn = true;
+            printf("\nCaution: pre-allocated tensor in a backend that cannot run the operation\n");
+        }
    }

    // graph input
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -41,7 +41,7 @@ maxhordelen = 400
 modelbusy = threading.Lock()
 requestsinqueue = 0
 defaultport = 5001
-KcppVersion = "1.75"
+KcppVersion = "1.75.1"
 showdebug = True
 guimode = False
 showsamplerwarning = True
@ -4038,7 +4038,7 @@ def main(launch_args,start_server=True):
            print(f"MacOS detected: Auto GPU layers set to maximum")
            args.gpulayers = 200
        elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param):
-            if not args.usecublas and (args.usevulkan is None) and not args.useclblast:
+            if (args.usecublas is None) and (args.usevulkan is None) and (args.useclblast is None):
                print("No GPU or CPU backend was selected. Trying to assign one for you automatically...")
                auto_set_backend_cli()
            if MaxMemory[0] == 0: #try to get gpu vram for cuda if not picked yet