force clear some DRY state vars on new generation - not sure if this helps

This commit is contained in:
Concedo 2024-08-14 21:35:39 +08:00
parent 689a17d756
commit e12ab53488
2 changed files with 5 additions and 2 deletions

View file

@ -26,7 +26,7 @@ set(LLAMA_GPROF OFF)
set(LLAMA_SANITIZE_THREAD OFF) set(LLAMA_SANITIZE_THREAD OFF)
set(LLAMA_SANITIZE_ADDRESS OFF) set(LLAMA_SANITIZE_ADDRESS OFF)
set(LLAMA_SANITIZE_UNDEFINED OFF) set(LLAMA_SANITIZE_UNDEFINED OFF)
set(LLAMA_SCHED_MAX_COPIES "2" CACHE STRING "llama: max input copies for pipeline parallelism")
# instruction set specific # instruction set specific
option(LLAMA_AVX "llama: enable AVX" ON) option(LLAMA_AVX "llama: enable AVX" ON)
@ -67,7 +67,6 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
add_compile_definitions(LOG_DISABLE_LOGS) add_compile_definitions(LOG_DISABLE_LOGS)
add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
file(GLOB GGML_SOURCES_CUDA "ggml/src/ggml-cuda/*.cu") file(GLOB GGML_SOURCES_CUDA "ggml/src/ggml-cuda/*.cu")
list(APPEND GGML_SOURCES_CUDA "ggml/src/ggml-cuda.cu") list(APPEND GGML_SOURCES_CUDA "ggml/src/ggml-cuda.cu")

View file

@ -1949,6 +1949,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
last_stop_reason = stop_reason::OUT_OF_TOKENS; last_stop_reason = stop_reason::OUT_OF_TOKENS;
stop_sequence.clear(); stop_sequence.clear();
special_stop_sequence.clear(); special_stop_sequence.clear();
dry_repeat_count.clear();
dry_sequence_breakers.clear();
dry_max_token_repeat.clear();
for(int x=0;x<stop_token_max;++x) for(int x=0;x<stop_token_max;++x)
{ {
std::string stopper = inputs.stop_sequence[x]; std::string stopper = inputs.stop_sequence[x];