From 23caa63f94f4a1871ed4305ce5d85f16a9fa1767 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 4 Aug 2024 23:42:22 +0800 Subject: [PATCH] up ver --- README.md | 2 +- koboldcpp.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2c4a6e2c6..e418bcdbb 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ After running this command you can launch Koboldcpp from the current directory u ## Obtaining a GGUF model - KoboldCpp uses GGUF models. They are not included here, but you can download GGUF files from other places such as [TheBloke's Huggingface](https://huggingface.co/TheBloke). Search for "GGUF" on huggingface.co for plenty of compatible models in the `.gguf` format. - For beginners, we recommend the models [Airoboros Mistral](https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf) or [Tiefighter 13B](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf) (larger model). -- [Alternatively, you can download the tools to convert models to the GGUF format yourself here](https://github.com/LostRuins/koboldcpp/releases/download/v1.69.1/koboldcpp_tools_6jul.zip). Run `convert-hf-to-gguf.py` to convert them, then `quantize_gguf.exe` to quantize the result. +- [Alternatively, you can download the tools to convert models to the GGUF format yourself here](https://kcpptools.concedo.workers.dev). Run `convert-hf-to-gguf.py` to convert them, then `quantize_gguf.exe` to quantize the result. ## Improving Performance - **GPU Acceleration**: If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecublas` flag (Nvidia Only), or `--usevulkan` (Any GPU), make sure you select the correct .exe with CUDA support. diff --git a/koboldcpp.py b/koboldcpp.py index 972805386..224c08377 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -41,7 +41,7 @@ maxhordelen = 400 modelbusy = threading.Lock() requestsinqueue = 0 defaultport = 5001 -KcppVersion = "1.72" +KcppVersion = "1.73" showdebug = True guimode = False showsamplerwarning = True @@ -2056,7 +2056,7 @@ def RunServerMultiThreaded(addr, port): if ipv6_sock: ipv6_sock = context.wrap_socket(ipv6_sock, server_side=True) - numThreads = 22 + numThreads = 24 ipv4_sock.bind((addr, port)) ipv4_sock.listen(numThreads) if ipv6_sock: