mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
up ver
This commit is contained in:
parent
52cc0daa1b
commit
23caa63f94
2 changed files with 3 additions and 3 deletions
|
@ -40,7 +40,7 @@ After running this command you can launch Koboldcpp from the current directory u
|
||||||
## Obtaining a GGUF model
|
## Obtaining a GGUF model
|
||||||
- KoboldCpp uses GGUF models. They are not included here, but you can download GGUF files from other places such as [TheBloke's Huggingface](https://huggingface.co/TheBloke). Search for "GGUF" on huggingface.co for plenty of compatible models in the `.gguf` format.
|
- KoboldCpp uses GGUF models. They are not included here, but you can download GGUF files from other places such as [TheBloke's Huggingface](https://huggingface.co/TheBloke). Search for "GGUF" on huggingface.co for plenty of compatible models in the `.gguf` format.
|
||||||
- For beginners, we recommend the models [Airoboros Mistral](https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf) or [Tiefighter 13B](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf) (larger model).
|
- For beginners, we recommend the models [Airoboros Mistral](https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf) or [Tiefighter 13B](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf) (larger model).
|
||||||
- [Alternatively, you can download the tools to convert models to the GGUF format yourself here](https://github.com/LostRuins/koboldcpp/releases/download/v1.69.1/koboldcpp_tools_6jul.zip). Run `convert-hf-to-gguf.py` to convert them, then `quantize_gguf.exe` to quantize the result.
|
- [Alternatively, you can download the tools to convert models to the GGUF format yourself here](https://kcpptools.concedo.workers.dev). Run `convert-hf-to-gguf.py` to convert them, then `quantize_gguf.exe` to quantize the result.
|
||||||
|
|
||||||
## Improving Performance
|
## Improving Performance
|
||||||
- **GPU Acceleration**: If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecublas` flag (Nvidia Only), or `--usevulkan` (Any GPU), make sure you select the correct .exe with CUDA support.
|
- **GPU Acceleration**: If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecublas` flag (Nvidia Only), or `--usevulkan` (Any GPU), make sure you select the correct .exe with CUDA support.
|
||||||
|
|
|
@ -41,7 +41,7 @@ maxhordelen = 400
|
||||||
modelbusy = threading.Lock()
|
modelbusy = threading.Lock()
|
||||||
requestsinqueue = 0
|
requestsinqueue = 0
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.72"
|
KcppVersion = "1.73"
|
||||||
showdebug = True
|
showdebug = True
|
||||||
guimode = False
|
guimode = False
|
||||||
showsamplerwarning = True
|
showsamplerwarning = True
|
||||||
|
@ -2056,7 +2056,7 @@ def RunServerMultiThreaded(addr, port):
|
||||||
if ipv6_sock:
|
if ipv6_sock:
|
||||||
ipv6_sock = context.wrap_socket(ipv6_sock, server_side=True)
|
ipv6_sock = context.wrap_socket(ipv6_sock, server_side=True)
|
||||||
|
|
||||||
numThreads = 22
|
numThreads = 24
|
||||||
ipv4_sock.bind((addr, port))
|
ipv4_sock.bind((addr, port))
|
||||||
ipv4_sock.listen(numThreads)
|
ipv4_sock.listen(numThreads)
|
||||||
if ipv6_sock:
|
if ipv6_sock:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue