diff --git a/ggml/src/ggml-backend.c b/ggml/src/ggml-backend.c index ba280e064..c66dba0eb 100644 --- a/ggml/src/ggml-backend.c +++ b/ggml/src/ggml-backend.c @@ -1174,6 +1174,8 @@ static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML #define GET_CAUSE(node) "" #endif +static bool backend_prealloc_warn = false; + // returns the backend that should be used for the node based on the current locations static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) { // TODO: use supports_op to check if the backend supports the op @@ -1196,7 +1198,11 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) { // since the tensor is pre-allocated, it cannot be moved to another backend - GGML_ABORT("pre-allocated tensor in a backend that cannot run the operation"); + if(!backend_prealloc_warn) + { + backend_prealloc_warn = true; + printf("\nCaution: pre-allocated tensor in a backend that cannot run the operation\n"); + } } // graph input diff --git a/koboldcpp.py b/koboldcpp.py index 6c34f2e4a..a63e66491 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -41,7 +41,7 @@ maxhordelen = 400 modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.75" +KcppVersion = "1.75.1" showdebug = True guimode = False showsamplerwarning = True @@ -4038,7 +4038,7 @@ def main(launch_args,start_server=True): print(f"MacOS detected: Auto GPU layers set to maximum") args.gpulayers = 200 elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param): - if not args.usecublas and (args.usevulkan is None) and not args.useclblast: + if (args.usecublas is None) and (args.usevulkan is None) and (args.useclblast is None): print("No GPU or CPU backend was selected. Trying to assign one for you automatically...") auto_set_backend_cli() if MaxMemory[0] == 0: #try to get gpu vram for cuda if not picked yet