Merge commit '9f102a1407' into concedo_experimental

# Conflicts:
#	.devops/intel.Dockerfile
#	.github/ISSUE_TEMPLATE/010-bug-compilation.yml
#	.github/ISSUE_TEMPLATE/011-bug-results.yml
#	.github/pull_request_template.md
#	CODEOWNERS
#	README.md
#	common/CMakeLists.txt
#	ggml/src/ggml-hexagon/ggml-hexagon.cpp
#	ggml/src/ggml-hexagon/htp/binary-ops.c
#	ggml/src/ggml-hexagon/htp/hex-dma.c
#	ggml/src/ggml-hexagon/htp/hex-dma.h
#	ggml/src/ggml-hexagon/htp/hex-dump.h
#	ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c
#	ggml/src/ggml-hexagon/htp/hvx-utils.h
#	ggml/src/ggml-hexagon/htp/main.c
#	ggml/src/ggml-hexagon/htp/ssm-conv.c
#	ggml/src/ggml-opencl/CMakeLists.txt
#	ggml/src/ggml-opencl/ggml-opencl.cpp
#	ggml/src/ggml-opencl/kernels/cvt.cl
#	ggml/src/ggml-rpc/ggml-rpc.cpp
#	scripts/snapdragon/adb/run-bench.sh
#	scripts/sync_vendor.py
#	tests/test-backend-ops.cpp
#	tools/llama-bench/llama-bench.cpp
This commit is contained in:
Concedo 2026-03-25 23:45:41 +08:00
commit c00fe0af5a
32 changed files with 1302 additions and 447 deletions

View file

@ -350,14 +350,6 @@ llama_context::llama_context(
if (cparams.pipeline_parallel) {
LLAMA_LOG_INFO("%s: pipeline parallelism enabled\n", __func__);
if (!graph_reuse_disable) {
// TODO: figure out a way to make graph reuse work with pipeline parallelism
// ref: https://github.com/ggml-org/llama.cpp/pull/20463
LLAMA_LOG_WARN("%s: graph reuse is currently not compatible with pipeline parallelism - disabling\n", __func__);
graph_reuse_disable = true;
}
}
sched_reserve();
@ -1199,6 +1191,13 @@ llm_graph_result * llama_context::process_ubatch(const llama_ubatch & ubatch, ll
if (!graph_reuse_disable && res->can_reuse(gparams)) {
//LLAMA_LOG_DEBUG("%s: reusing previous graph\n", __func__);
// with pipeline parallelism, the previous graph_compute_async may still be running
// on the GPU. we must synchronize before set_inputs to avoid overwriting input tensors
// that the previous compute is still reading.
if (cparams.pipeline_parallel) {
ggml_backend_sched_synchronize(sched.get());
}
n_reused++;
} else {
res->reset();