From e12ab534886eaeb65a3bbb5340a3b56f59cb3d1f Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed, 14 Aug 2024 21:35:39 +0800 Subject: [PATCH] force clear some DRY state vars on new generation - not sure if this helps --- CMakeLists.txt | 3 +-- gpttype_adapter.cpp | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e2cf02347..c296aa884 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ set(LLAMA_GPROF OFF) set(LLAMA_SANITIZE_THREAD OFF) set(LLAMA_SANITIZE_ADDRESS OFF) set(LLAMA_SANITIZE_UNDEFINED OFF) -set(LLAMA_SCHED_MAX_COPIES "2" CACHE STRING "llama: max input copies for pipeline parallelism") + # instruction set specific option(LLAMA_AVX "llama: enable AVX" ON) @@ -67,7 +67,6 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) add_compile_definitions(LOG_DISABLE_LOGS) -add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES}) file(GLOB GGML_SOURCES_CUDA "ggml/src/ggml-cuda/*.cu") list(APPEND GGML_SOURCES_CUDA "ggml/src/ggml-cuda.cu") diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index d5d5d8d0c..a298af40b 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1949,6 +1949,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs) last_stop_reason = stop_reason::OUT_OF_TOKENS; stop_sequence.clear(); special_stop_sequence.clear(); + dry_repeat_count.clear(); + dry_sequence_breakers.clear(); + dry_max_token_repeat.clear(); + for(int x=0;x