Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	CMakeLists.txt
#	README.md
#	ci/run.sh
#	llama.cpp
#	models/ggml-vocab-llama-bpe.gguf.inp
#	models/ggml-vocab-llama-bpe.gguf.out
#	requirements.txt
#	scripts/compare-llama-bench.py
#	scripts/sync-ggml.last
#	tests/CMakeLists.txt
#	tests/test-backend-ops.cpp
#	tests/test-grammar-integration.cpp
#	tests/test-tokenizer-1-bpe.cpp
This commit is contained in:
Concedo 2024-05-14 19:28:47 +08:00
commit 2ee808a747
66 changed files with 3034 additions and 1821 deletions

View file

@ -524,6 +524,10 @@ int main(int argc, char ** argv) {
}
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
if (!ctx_sampling) {
fprintf(stderr, "%s: failed to initialize sampling subsystem\n", __func__);
exit(1);
}
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
// predict
@ -880,7 +884,7 @@ int main(int argc, char ** argv) {
}
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
const auto line_inp = ::llama_tokenize(ctx, buffer, false, params.interactive_specials);
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());