mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
fixed incorrect auto gpu settings, fixed clblast not working
This commit is contained in:
parent
004a35b16d
commit
229108f877
2 changed files with 9 additions and 3 deletions
|
@ -1174,6 +1174,8 @@ static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML
|
||||||
#define GET_CAUSE(node) ""
|
#define GET_CAUSE(node) ""
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static bool backend_prealloc_warn = false;
|
||||||
|
|
||||||
// returns the backend that should be used for the node based on the current locations
|
// returns the backend that should be used for the node based on the current locations
|
||||||
static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
|
static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, struct ggml_tensor * tensor) {
|
||||||
// TODO: use supports_op to check if the backend supports the op
|
// TODO: use supports_op to check if the backend supports the op
|
||||||
|
@ -1196,7 +1198,11 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
||||||
|
|
||||||
if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
|
if (tensor->buffer || (tensor->view_src && tensor->view_src->buffer)) {
|
||||||
// since the tensor is pre-allocated, it cannot be moved to another backend
|
// since the tensor is pre-allocated, it cannot be moved to another backend
|
||||||
GGML_ABORT("pre-allocated tensor in a backend that cannot run the operation");
|
if(!backend_prealloc_warn)
|
||||||
|
{
|
||||||
|
backend_prealloc_warn = true;
|
||||||
|
printf("\nCaution: pre-allocated tensor in a backend that cannot run the operation\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// graph input
|
// graph input
|
||||||
|
|
|
@ -41,7 +41,7 @@ maxhordelen = 400
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.75"
|
KcppVersion = "1.75.1"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
guimode = False
|
guimode = False
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
|
@ -4038,7 +4038,7 @@ def main(launch_args,start_server=True):
|
||||||
print(f"MacOS detected: Auto GPU layers set to maximum")
|
print(f"MacOS detected: Auto GPU layers set to maximum")
|
||||||
args.gpulayers = 200
|
args.gpulayers = 200
|
||||||
elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param):
|
elif not shouldavoidgpu and args.model_param and os.path.exists(args.model_param):
|
||||||
if not args.usecublas and (args.usevulkan is None) and not args.useclblast:
|
if (args.usecublas is None) and (args.usevulkan is None) and (args.useclblast is None):
|
||||||
print("No GPU or CPU backend was selected. Trying to assign one for you automatically...")
|
print("No GPU or CPU backend was selected. Trying to assign one for you automatically...")
|
||||||
auto_set_backend_cli()
|
auto_set_backend_cli()
|
||||||
if MaxMemory[0] == 0: #try to get gpu vram for cuda if not picked yet
|
if MaxMemory[0] == 0: #try to get gpu vram for cuda if not picked yet
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue