Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # .github/workflows/server.yml # CMakeLists.txt # Makefile # examples/embedding/embedding.cpp # examples/imatrix/imatrix.cpp # examples/llama-bench/llama-bench.cpp # examples/llava/MobileVLM-README.md # examples/parallel/parallel.cpp # examples/perplexity/perplexity.cpp # examples/quantize/CMakeLists.txt # examples/server/README.md # examples/speculative/speculative.cpp # tests/test-backend-ops.cpp
2025-09-11 09:34:37 +00:00 · 2024-09-13 16:17:24 +08:00 · 2024-09-13 16:17:24 +08:00 · e44ddf26ef
commit e44ddf26ef
parent 0fd85c3940 0abc6a2c25
47 changed files with 117978 additions and 117646 deletions
--- a/examples/batched.swift/Sources/main.swift
+++ b/examples/batched.swift/Sources/main.swift
@ -200,8 +200,8 @@ let t_main_end = ggml_time_us()

 print("decoded \(n_decode) tokens in \(String(format: "%.2f", Double(t_main_end - t_main_start) / 1_000_000.0)) s, speed: \(String(format: "%.2f", Double(n_decode) / (Double(t_main_end - t_main_start) / 1_000_000.0))) t/s\n\n")

-llama_perf_print(UnsafeRawPointer(context), LLAMA_PERF_TYPE_CONTEXT)
-llama_perf_print(UnsafeRawPointer(smpl),    LLAMA_PERF_TYPE_SAMPLER_CHAIN)
+llama_perf_sampler_print(smpl)
+llama_perf_context_print(context)

 private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
    let utf8Count = text.utf8.count