Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/ISSUE_TEMPLATE/010-bug-compilation.yml # .github/ISSUE_TEMPLATE/011-bug-results.yml # .github/ISSUE_TEMPLATE/019-bug-misc.yml # .github/ISSUE_TEMPLATE/020-enhancement.yml # .github/ISSUE_TEMPLATE/030-research.yml # .github/ISSUE_TEMPLATE/040-refactor.yml # ggml/CMakeLists.txt # ggml/src/ggml-cann/ggml-cann.cpp # ggml/src/ggml-hexagon/CMakeLists.txt # ggml/src/ggml-hexagon/ggml-hexagon.cpp # ggml/src/ggml-hexagon/htp/CMakeLists.txt # ggml/src/ggml-hexagon/htp/cmake-toolchain.cmake # ggml/src/ggml-hexagon/htp/flash-attn-ops.c # ggml/src/ggml-hexagon/htp/hex-utils.h # ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c # ggml/src/ggml-hexagon/htp/hmx-ops.h # ggml/src/ggml-hexagon/htp/hmx-utils.h # ggml/src/ggml-hexagon/htp/hvx-base.h # ggml/src/ggml-hexagon/htp/hvx-copy.h # ggml/src/ggml-hexagon/htp/hvx-exp.h # ggml/src/ggml-hexagon/htp/unary-ops.c # ggml/src/ggml-opencl/CMakeLists.txt # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-opencl/kernels/cvt.cl # ggml/src/ggml-rpc/ggml-rpc.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-virtgpu/ggml-backend.cpp # ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp # ggml/src/ggml-webgpu/ggml-webgpu.cpp # ggml/src/ggml-webgpu/wgsl-shaders/mul_mat_vec.wgsl # ggml/src/ggml-zdnn/ggml-zdnn.cpp # ggml/src/ggml-zendnn/ggml-zendnn.cpp # scripts/sync-ggml.last # tests/test-backend-ops.cpp
2026-05-18 23:49:46 +00:00 · 2026-05-02 18:07:50 +08:00 · 2026-05-02 18:07:50 +08:00 · 7c70187e26
commit 7c70187e26
parent 8b62e7b667 63d93d1733
190 changed files with 11572 additions and 7414 deletions
--- a/src/llama-mmap.cpp
+++ b/src/llama-mmap.cpp
@ -40,6 +40,14 @@
 #include <TargetConditionals.h>
 #endif

+#ifdef _WIN32
+#    define llama_mmap_ftell _ftelli64
+#    define llama_mmap_fseek _fseeki64
+#else
+#    define llama_mmap_ftell ftello
+#    define llama_mmap_fseek fseeko
+#endif
+
 // TODO: consider moving to llama-impl.h if needed in more places
 #if defined(_WIN32)
 static std::string llama_format_win_err(DWORD err) {
@ -226,7 +234,7 @@ struct llama_file::impl {

    size_t tell() const {
        if (fd == -1) {
-            long ret = std::ftell(fp);
+            off_t ret = llama_mmap_ftell(fp);
            if (ret == -1) {
                throw std::runtime_error(format("ftell error: %s", strerror(errno)));
            }
@ -244,7 +252,7 @@ struct llama_file::impl {
    void seek(size_t offset, int whence) const {
        off_t ret = 0;
        if (fd == -1) {
-            ret = std::fseek(fp, (long) offset, whence);
+            ret = llama_mmap_fseek(fp, offset, whence);
        } else {
            ret = lseek(fd, offset, whence);
        }
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -685,9 +685,9 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_mod
                        LLAMA_LOG_WARN("%s: %-36s - applying manual override: %s -> %s\n",
                                       __func__, tensor_name.c_str(), ggml_type_name(new_type), ggml_type_name(qtype));
                        new_type = qtype;
-                        manual = true;
-                        break;
                    }
+                    manual = true;
+                    break;
                }
            }
        }