mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-20 09:25:53 +00:00
Merge branch 'concedo' into concedo_experimental
This commit is contained in:
commit
301a04adfc
3 changed files with 6 additions and 7 deletions
|
|
@ -1065,7 +1065,6 @@ struct ggml_cuda_graph {
|
|||
cudaGraphExec_t instance = nullptr;
|
||||
size_t num_nodes = 0;
|
||||
std::vector<cudaGraphNode_t> nodes;
|
||||
std::vector<cudaKernelNodeParams> params;
|
||||
bool disable_due_to_gpu_arch = false;
|
||||
bool disable_due_to_too_many_updates = false;
|
||||
bool disable_due_to_failed_graph_capture = false;
|
||||
|
|
|
|||
|
|
@ -3708,6 +3708,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
|
|||
}
|
||||
|
||||
static bool ggml_cuda_set_cuda_graph_enabled(ggml_backend_cuda_context * cuda_ctx) {
|
||||
|
||||
#ifdef USE_CUDA_GRAPH
|
||||
static const bool disable_cuda_graphs_due_to_env = (getenv("GGML_CUDA_DISABLE_GRAPHS") != nullptr);
|
||||
|
||||
|
|
@ -3748,17 +3749,15 @@ static bool ggml_cuda_set_cuda_graph_enabled(ggml_backend_cuda_context * cuda_ct
|
|||
static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *) backend->context;
|
||||
|
||||
ggml_cuda_set_device(cuda_ctx->device);
|
||||
|
||||
bool use_cuda_graph = false;
|
||||
bool cuda_graph_update_required = false;
|
||||
|
||||
// graph_optimize calls set_cuda_graph_enabled, in-case it not called (i.e. graph_compute is directly called)
|
||||
// we call it here instead.
|
||||
#ifdef USE_CUDA_GRAPH
|
||||
if (!cuda_ctx->cuda_graph) {
|
||||
use_cuda_graph = ggml_cuda_set_cuda_graph_enabled(cuda_ctx);
|
||||
} else {
|
||||
use_cuda_graph = cuda_ctx->cuda_graph && cuda_ctx->cuda_graph->cuda_graphs_enabled;
|
||||
}
|
||||
use_cuda_graph = ggml_cuda_set_cuda_graph_enabled(cuda_ctx);
|
||||
|
||||
if (use_cuda_graph) {
|
||||
cuda_graph_update_required = is_cuda_graph_update_required(cuda_ctx, cgraph);
|
||||
|
|
@ -3774,6 +3773,7 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend,
|
|||
|
||||
if (cuda_ctx->cuda_graph->number_consecutive_updates >= 4) {
|
||||
cuda_ctx->cuda_graph->disable_due_to_too_many_updates = true;
|
||||
cuda_ctx->cuda_graph->cuda_graphs_enabled = false;
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: disabling CUDA graphs due to too many consecutive updates\n", __func__);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ dry_seq_break_max = 128
|
|||
extra_images_max = 4 # for kontext/qwen img
|
||||
|
||||
# global vars
|
||||
KcppVersion = "1.105.2"
|
||||
KcppVersion = "1.105.3"
|
||||
showdebug = True
|
||||
kcpp_instance = None #global running instance
|
||||
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_config_target":""}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue