Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	.github/workflows/release.yml
#	docs/android.md
#	docs/backend/hexagon/CMakeUserPresets.json
#	examples/llama.android/app/src/main/res/layout/activity_main.xml
#	examples/llama.android/app/src/main/res/layout/item_message_assistant.xml
#	examples/llama.android/app/src/main/res/layout/item_message_user.xml
#	examples/model-conversion/scripts/causal/run-org-model.py
#	examples/model-conversion/scripts/utils/common.py
#	ggml/CMakeLists.txt
#	ggml/src/ggml-hexagon/CMakeLists.txt
#	ggml/src/ggml-hexagon/htp/CMakeLists.txt
#	ggml/src/ggml-hexagon/htp/matmul-ops.c
#	tests/test-arg-parser.cpp
#	tools/server/README.md
This commit is contained in:
Concedo 2025-12-20 09:32:06 +08:00
commit 7304640f72
23 changed files with 1317 additions and 625 deletions

View file

@ -1109,10 +1109,10 @@ bool llama_model_loader::load_all_data(
} else {
// If upload_backend is valid load the tensor in chunks to pinned memory and upload the buffers asynchronously to the GPU.
if (upload_backend) {
auto offset = (off_t) weight->offs;
size_t offset = weight->offs;
alignment = file->read_alignment();
off_t aligned_offset = offset & ~(alignment - 1);
off_t offset_from_alignment = offset - aligned_offset;
size_t aligned_offset = offset & ~(alignment - 1);
size_t offset_from_alignment = offset - aligned_offset;
file->seek(aligned_offset, SEEK_SET);
// Calculate aligned read boundaries