From fa22c1a5a4dfc92a5cced1211fdfca627e29b7b3 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed, 7 May 2025 18:30:36 +0800 Subject: [PATCH] fixed cfg scale, but turns out it sucks. embedded aria2c into pyinstaller --- CMakeLists.txt | 3 ++- ggml/src/ggml-cuda/ggml-cuda.cu | 2 +- gpttype_adapter.cpp | 19 ++++++++++++++++--- koboldcpp.py | 7 +++++-- make_pyinstaller.bat | 2 +- make_pyinstaller_cuda.bat | 2 +- make_pyinstaller_cuda12.bat | 2 +- make_pyinstaller_cuda_oldcpu.bat | 2 +- 8 files changed, 28 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e5ac087a8..143b6894f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,8 +130,9 @@ if (LLAMA_CUBLAS) # 61 == integer CUDA intrinsics # 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster # 75 == int8 tensor cores + # 80 == Ampere, asynchronous data loading, faster tensor core instructions if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) - set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual") # needed for f16 CUDA intrinsics + set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual") # needed for f16 CUDA intrinsics else() message("CUDA Toolkit Version: ${CUDAToolkit_VERSION}") if(CUDAToolkit_VERSION VERSION_GREATER 12) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index af84257d2..581a725d5 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -221,7 +221,7 @@ static ggml_cuda_device_info ggml_cuda_init() { //#else // GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__); //#endif // GGML_CUDA_FORCE_CUBLAS - GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes for first launch...\n---\n"); + GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\nJust a moment, Please Be Patient...\n---\n"); GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count); for (int id = 0; id < info.device_count; ++id) { int device_vmm = 0; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 35fa9b81f..a84a7ffde 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1587,6 +1587,15 @@ void sample_guidance(struct llama_context * ctx, struct llama_context * guidance scale = 0; } + if(debugmode==1 && !is_quiet) + { + int topidx1 = std::max_element(mainLogitsPtr, mainLogitsPtr + n_vocab) - mainLogitsPtr; + int topidx2 = std::max_element(guidanceLogitsPtr, guidanceLogitsPtr + n_vocab) - guidanceLogitsPtr; + printf("\nMain: (id:%d val:%f data:%s) Guided: (id:%d val:%f data:%s)\n", topidx1, mainLogitsPtr[topidx1], + FileFormatTokenizeID(topidx1, file_format, true).c_str(), topidx2, guidanceLogitsPtr[topidx2], + FileFormatTokenizeID(topidx2, file_format, true).c_str()); + } + for (int i = 0; i < n_vocab; ++i) { float logit_guidance = guidanceLogitsPtr[i]; float logit_main = mainLogitsPtr[i]; @@ -3449,7 +3458,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) guidance_embd.insert(guidance_embd.begin(), negprompt_tokens.begin(), negprompt_tokens.end()); //eval the guidance prompt - printf("Preparing Negative Prompt (%zu tokens)\n", guidance_embd.size()); + printf("\nPreparing Negative Prompt (%zu tokens)", guidance_embd.size()); kcpp_embd_batch batch = kcpp_embd_batch(guidance_embd, 0, use_mrope, false); auto er = llama_decode(guidance_ctx, batch.batch); if(er!=0) @@ -3653,8 +3662,12 @@ generation_outputs gpttype_generate(const generation_inputs inputs) if(guidance_ctx && negprompt_tokens.size()>0 && inputs.guidance_scale!=1.0f && embd.size()==1 && startedsampling) { //eval for negative prompt - kcpp_embd_batch batch = kcpp_embd_batch(embd, guidance_n_past, use_mrope, false); - evalres = (evalres && (llama_decode(guidance_ctx, batch.batch)==0)); + kcpp_embd_batch gbatch = kcpp_embd_batch(embd, guidance_n_past, use_mrope, false); + auto er = llama_decode(guidance_ctx, gbatch.batch); + if(er!=0) + { + printf("\nGenerate with Negative Prompt Failed! (code:%d)\n",er); + } guidance_n_past += 1; } if(embd.size()!=1 || draft_ctx==nullptr || remaining_tokens<=speculative_chunk_amt || grammar!=nullptr || startedsampling==false) //for large batch, or if no draft model, PP/TG as usual diff --git a/koboldcpp.py b/koboldcpp.py index 9b41fb65a..40e9bbe39 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -3681,7 +3681,10 @@ def RunServerMultiThreaded(addr, port, server_handler): global exitcounter exitcounter = 999 for i in range(numThreads): - threadArr[i].stop() + try: + threadArr[i].stop() + except Exception: + continue sys.exit(0) # Based on https://github.com/mathgeniuszach/xdialog/blob/main/xdialog/zenity_dialogs.py - MIT license | - Expanded version by Henk717 @@ -6723,7 +6726,7 @@ if __name__ == '__main__': compatgroup.add_argument("--usevulkan", help="Use Vulkan for GPU Acceleration. Can optionally specify one or more GPU Device ID (e.g. --usevulkan 0), leave blank to autodetect.", metavar=('[Device IDs]'), nargs='*', type=int, default=None) compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) compatgroup.add_argument("--usecpu", help="Do not use any GPU acceleration (CPU Only)", action='store_true') - parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 4096). Supported values are [256,512,1024,2048,3072,4096,6144,8192,10240,12288,14336,16384,20480,24576,28672,32768,40960,49152,57344,65536,81920,98304,114688,131072]. IF YOU USE ANYTHING ELSE YOU ARE ON YOUR OWN.",metavar=('[256,512,1024,2048,3072,4096,6144,8192,10240,12288,14336,16384,20480,24576,28672,32768,40960,49152,57344,65536,81920,98304,114688,131072]'), type=check_range(int,256,262144), default=4096) + parser.add_argument("--contextsize", help="Controls the memory allocated for maximum context size, only change if you need more RAM for big contexts. (default 4096).",metavar=('[256 to 262144]'), type=check_range(int,256,262144), default=4096) parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU. Set to -1 to try autodetect, set to 0 to disable GPU offload.",metavar=('[GPU layers]'), nargs='?', const=1, type=int, default=-1) parser.add_argument("--tensor_split", help="For CUDA and Vulkan only, ratio to split tensors across multiple GPUs, space-separated list of proportions, e.g. 7 3", metavar=('[Ratios]'), type=float, nargs='+') diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat index aecb50594..0f05f6348 100644 --- a/make_pyinstaller.bat +++ b/make_pyinstaller.bat @@ -1,2 +1,2 @@ call create_ver_file.bat -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./simpleclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./json_to_gbnf.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_nocuda.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./simpleclinfo.exe;." --add-data "./aria2c-win.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./json_to_gbnf.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_nocuda.exe" \ No newline at end of file diff --git a/make_pyinstaller_cuda.bat b/make_pyinstaller_cuda.bat index d5f8fd5ea..69d931b1e 100644 --- a/make_pyinstaller_cuda.bat +++ b/make_pyinstaller_cuda.bat @@ -1,2 +1,2 @@ call create_ver_file.bat -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./simpleclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./json_to_gbnf.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./simpleclinfo.exe;." --add-data "./aria2c-win.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./json_to_gbnf.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/make_pyinstaller_cuda12.bat b/make_pyinstaller_cuda12.bat index 34a05ed35..1e32c5928 100644 --- a/make_pyinstaller_cuda12.bat +++ b/make_pyinstaller_cuda12.bat @@ -1,2 +1,2 @@ call create_ver_file.bat -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./simpleclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./json_to_gbnf.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_cu12.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./simpleclinfo.exe;." --add-data "./aria2c-win.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./json_to_gbnf.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_cu12.exe" \ No newline at end of file diff --git a/make_pyinstaller_cuda_oldcpu.bat b/make_pyinstaller_cuda_oldcpu.bat index 5e739c240..6e16d0d9c 100644 --- a/make_pyinstaller_cuda_oldcpu.bat +++ b/make_pyinstaller_cuda_oldcpu.bat @@ -1,2 +1,2 @@ call create_ver_file.bat -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./simpleclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./json_to_gbnf.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_oldcpu.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./simpleclinfo.exe;." --add-data "./aria2c-win.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./json_to_gbnf.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_oldcpu.exe" \ No newline at end of file