Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.devops/full-cuda.Dockerfile
#	.devops/nix/devshells.nix
#	.devops/nix/nixpkgs-instances.nix
#	.devops/nix/package.nix
#	.devops/nix/scope.nix
#	README.md
#	docs/docker.md
#	examples/llama-bench/llama-bench.cpp
#	flake.lock
#	flake.nix
#	grammars/README.md
#	src/llama.cpp
This commit is contained in:
Concedo 2024-09-06 01:07:31 +08:00
commit 73dca7e5bc
24 changed files with 2747 additions and 666 deletions

View file

@ -387,8 +387,8 @@ int main(int argc, char ** argv) {
}
LOGLN(
"recalculate the cached logits (check): embd_inp.empty() %s, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu, embd_inp.size() %zu",
log_tostr(embd_inp.empty()), n_matching_session_tokens, embd_inp.size(), session_tokens.size(), embd_inp.size());
"recalculate the cached logits (check): embd_inp.empty() %s, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu",
log_tostr(embd_inp.empty()), n_matching_session_tokens, embd_inp.size(), session_tokens.size());
// if we will use the cache for the full prompt without reaching the end of the cache, force
// reevaluation of the last token to recalculate the cached logits