Merge commit '017cc5f446' into concedo_experimental

# Conflicts:
#	.github/ISSUE_TEMPLATE/010-bug-compilation.yml
#	.github/ISSUE_TEMPLATE/019-bug-misc.yml
#	CODEOWNERS
#	examples/batched-bench/batched-bench.cpp
#	examples/batched/batched.cpp
#	examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
#	examples/gritlm/gritlm.cpp
#	examples/llama-bench/llama-bench.cpp
#	examples/passkey/passkey.cpp
#	examples/quantize-stats/quantize-stats.cpp
#	examples/run/run.cpp
#	examples/simple-chat/simple-chat.cpp
#	examples/simple/simple.cpp
#	examples/tokenize/tokenize.cpp
#	ggml/CMakeLists.txt
#	ggml/src/ggml-metal/CMakeLists.txt
#	ggml/src/ggml-vulkan/CMakeLists.txt
#	scripts/sync-ggml.last
#	src/llama.cpp
#	tests/test-autorelease.cpp
#	tests/test-model-load-cancel.cpp
#	tests/test-tokenizer-0.cpp
#	tests/test-tokenizer-1-bpe.cpp
#	tests/test-tokenizer-1-spm.cpp
This commit is contained in:
Concedo 2025-01-08 23:15:21 +08:00
commit dcfa1eca4e
45 changed files with 806 additions and 229 deletions

View file

@ -770,7 +770,7 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
if (tensor->op != GGML_OP_ROPE && src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
int src_backend_id = ggml_backend_sched_backend_from_buffer(sched, src, tensor);
// check if a backend with higher prio wants to offload the op
if (src_backend_id == sched->n_backends - 1) {
if (src_backend_id == sched->n_backends - 1 && ggml_backend_buffer_is_host(src->buffer)) {
for (int b = 0; b < src_backend_id; b++) {
if (ggml_backend_supports_op(sched->backends[b], tensor) && ggml_backend_offload_op(sched->backends[b], tensor)) {
SET_CAUSE(tensor, "1.off");
@ -801,9 +801,12 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
for (int i = 0; i < graph->n_nodes; i++) {
if (cur_split < sched->n_splits && i == sched->splits[cur_split].i_start) {
ggml_backend_t split_backend = sched->backends[sched->splits[cur_split].backend_id];
GGML_LOG_DEBUG("\n## SPLIT #%d: %s # %d inputs: ", cur_split, ggml_backend_name(split_backend),
GGML_LOG_DEBUG("\n## SPLIT #%d: %s # %d inputs", cur_split, ggml_backend_name(split_backend),
sched->splits[cur_split].n_inputs);
for (int j = 0; j < sched->splits[cur_split].n_inputs; j++) {
if (j == 0) {
GGML_LOG_DEBUG(": ");
}
GGML_LOG_DEBUG("[%s (%5.5s)] ", sched->splits[cur_split].inputs[j]->name,
fmt_size(ggml_nbytes(sched->splits[cur_split].inputs[j])));
}