Merge branch 'upstream' into concedo_experimental

# Conflicts: # .devops/full-cuda.Dockerfile # .devops/nix/devshells.nix # .devops/nix/nixpkgs-instances.nix # .devops/nix/package.nix # .devops/nix/scope.nix # README.md # docs/docker.md # examples/llama-bench/llama-bench.cpp # flake.lock # flake.nix # grammars/README.md # src/llama.cpp
2025-09-10 17:14:36 +00:00 · 2024-09-06 01:07:31 +08:00 · 2024-09-06 01:07:31 +08:00 · 73dca7e5bc
commit 73dca7e5bc
parent d777995991 bdf314f38a
24 changed files with 2747 additions and 666 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -387,8 +387,8 @@ int main(int argc, char ** argv) {
    }

    LOGLN(
-            "recalculate the cached logits (check): embd_inp.empty() %s, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu, embd_inp.size() %zu",
-            log_tostr(embd_inp.empty()), n_matching_session_tokens, embd_inp.size(), session_tokens.size(), embd_inp.size());
+            "recalculate the cached logits (check): embd_inp.empty() %s, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu",
+            log_tostr(embd_inp.empty()), n_matching_session_tokens, embd_inp.size(), session_tokens.size());

    // if we will use the cache for the full prompt without reaching the end of the cache, force
    // reevaluation of the last token to recalculate the cached logits