mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-17 12:39:09 +00:00
347 lines
17 KiB
Bash
347 lines
17 KiB
Bash
|
|
#!/bin/bash
|
|
shopt -s extglob
|
|
|
|
WORKSPACE=/workspace
|
|
|
|
if [ -d /runpod-volume ]; then
|
|
WORKSPACE=/runpod-volume
|
|
fi
|
|
|
|
if [ ! -d $WORKSPACE/$KCPP_SUBFOLDER ]; then
|
|
mkdir -p $WORKSPACE
|
|
mkdir -p $WORKSPACE/$KCPP_SUBFOLDER
|
|
fi
|
|
cd $WORKSPACE/$KCPP_SUBFOLDER
|
|
|
|
if [[ ! -n "$KCPP_MODEL" ]] && [[ ! -n "$KCPP_IMGMODEL" ]] && [[ ! -n "$KCPP_WHISPERMODEL" ]] && [[ ! -n "$KCPP_TTSMODEL" ]] && [[ ! -n "$KCPP_EMBEDMODEL" ]] && [[ ! -n "$KCPP_TTSMODEL" ]]; then
|
|
if [[ ! -n "$KCPP_ARGS" ]]; then
|
|
if [[ $KCPP_DONT_TUNNEL != "true" ]]; then
|
|
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
|
echo "!! This docker will setup a cloudflare tunnel by default as it was designed for GPU rental services. !!"
|
|
echo "!! Use the KCPP_DONT_TUNNEL=true environment variable if you do not wish this to happen. !!"
|
|
echo "!! For example: docker run --rm -e KCPP_DONT_TUNNEL=true -p 5001:5001 -it koboldai/koboldcpp !!"
|
|
echo "!! The docker compose example mentioned below has optimized defaults for local usage and does not do this. !!"
|
|
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
|
fi
|
|
echo
|
|
echo "Welcome to the official KoboldCpp Docker!"
|
|
echo
|
|
echo "To use KoboldCpp in a docker you must define environment variables."
|
|
echo "Our built in Model Downloader can be used with KCPP_MODEL, KCPP_IMGMODEL, KCPP_MMPROJ, KCPP_EMBEDMODEL, KCPP_TTSMODEL and KCPP_WHISPERMODEL"
|
|
echo "Additional arguments can be specified with KCPP_ARGS, for example for GPU usage: --usecuda mmq --gpulayers 99 --multiuser 20"
|
|
echo "KoboldCpp runs on port 5001 by default, make sure to port forward in docker if you wish to run on your local network."
|
|
echo "For a full list of arguments use --help as the KCPP_ARGS argument."
|
|
echo "Mounting your own models locally instead? Use the --model arg instead of our KCPP_MODEL environment variable"
|
|
echo "You can also mount a volume to /workspace to persist the KCPP_MODEL across restarts"
|
|
echo
|
|
echo "Need an example for Docker Compose? Run: docker run --rm -it koboldai/koboldcpp compose-example"
|
|
echo "Optionally use the following to extract docker-compose.yml to the current directory : docker run --rm -v .:/workspace -it koboldai/koboldcpp compose-example"
|
|
echo
|
|
echo "Questions? Reach out to us on https://koboldai.org/discord for one on one support"
|
|
echo
|
|
echo "Launching KoboldCpp with a tiny demo model in 1 minute, you can test functionality but expect weak logic."
|
|
sleep 60
|
|
KCPP_MODEL=https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q4_1.gguf?download=true
|
|
fi
|
|
fi
|
|
|
|
# Setup ssh
|
|
if [[ $PUBLIC_KEY ]]; then
|
|
echo "Setting up SSH..."
|
|
apt-get update
|
|
DEBIAN_FRONTEND=noninteractive apt-get install openssh-server -y
|
|
mkdir -p ~/.ssh
|
|
echo "$PUBLIC_KEY" >> ~/.ssh/authorized_keys
|
|
chmod 700 -R ~/.ssh
|
|
|
|
if [ ! -f /etc/ssh/ssh_host_rsa_key ]; then
|
|
ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -q -N ''
|
|
echo "RSA key fingerprint:"
|
|
ssh-keygen -lf /etc/ssh/ssh_host_rsa_key.pub
|
|
fi
|
|
|
|
if [ ! -f /etc/ssh/ssh_host_dsa_key ]; then
|
|
ssh-keygen -t dsa -f /etc/ssh/ssh_host_dsa_key -q -N ''
|
|
echo "DSA key fingerprint:"
|
|
ssh-keygen -lf /etc/ssh/ssh_host_dsa_key.pub
|
|
fi
|
|
|
|
if [ ! -f /etc/ssh/ssh_host_ecdsa_key ]; then
|
|
ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key -q -N ''
|
|
echo "ECDSA key fingerprint:"
|
|
ssh-keygen -lf /etc/ssh/ssh_host_ecdsa_key.pub
|
|
fi
|
|
|
|
if [ ! -f /etc/ssh/ssh_host_ed25519_key ]; then
|
|
ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -q -N ''
|
|
echo "ED25519 key fingerprint:"
|
|
ssh-keygen -lf /etc/ssh/ssh_host_ed25519_key.pub
|
|
fi
|
|
|
|
service ssh start
|
|
|
|
echo "SSH host keys:"
|
|
for key in /etc/ssh/*.pub; do
|
|
echo "Key: $key"
|
|
ssh-keygen -lf $key
|
|
done
|
|
fi
|
|
|
|
#cat /proc/cpuinfo
|
|
|
|
export SSL_CERT_DIR=/etc/ssl/certs
|
|
|
|
if [[ $DEVMODE == "true" ]]; then
|
|
export GLANCES=true
|
|
curl -fsSL https://code-server.dev/install.sh | sh
|
|
code-server --bind-addr 0.0.0.0 --port 3 &
|
|
fi
|
|
|
|
if [[ $GLANCES == "true" ]]; then
|
|
glances -w --enable-mcp &
|
|
fi
|
|
|
|
if [[ $DEVMODE == "true" ]]; then
|
|
rm /usr/sbin/reboot
|
|
rm /usr/sbin/poweroff
|
|
rm /usr/sbin/halt
|
|
rm /usr/sbin/shutdown
|
|
echo "echo 1 > /rr && echo Attempting Reboot..." > /usr/sbin/reboot
|
|
echo "echo 0 > /rr && echo Attempting Shutdown..." > /usr/sbin/poweroff
|
|
echo "echo 0 > /rr && echo Attempting Shutdown..." > /usr/sbin/halt
|
|
echo "echo Please use the poweroff or reboot" > /usr/sbin/shutdown
|
|
chmod +x /usr/sbin/reboot
|
|
chmod +x /usr/sbin/poweroff
|
|
chmod +x /usr/sbin/halt
|
|
chmod +x /usr/sbin/shutdown
|
|
|
|
|
|
# Wait for a signal to restart
|
|
while true; do
|
|
if [ -f /rr ]; then
|
|
echo "Received exit request by user"
|
|
rebootflag=$(cat /rr)
|
|
# Remove the restart flag
|
|
rm -f /rr
|
|
exit $rebootflag
|
|
fi
|
|
sleep 10
|
|
done
|
|
fi
|
|
|
|
if [ -n "$KCPP_DONT_REMOVE_MODELS" ]; then
|
|
echo "KCPP_DONT_REMOVE_MODELS has been removed and will be ignored, we only delete all files in the mounted workspace if KCPP_REMOVE_FILES is set to true."
|
|
fi
|
|
|
|
if [[ $KCPP_REMOVE_FILES == "true" ]]; then
|
|
echo "REMOVING EVERYTHING THATS NOT KOBOLDCPP"
|
|
rm !(koboldcpp)
|
|
rm -rf splitmodel/
|
|
fi
|
|
|
|
KCPP_CMDLINE="$KCPP_ARGS"
|
|
|
|
if [ -f "/opt/koboldcpp/default.args" ]; then
|
|
KCPP_CMDLINE="$KCPP_CMDLINE `cat /opt/koboldcpp/default.args`"
|
|
fi
|
|
|
|
if [[ $KCPP_DONT_TUNNEL != "true" ]]; then
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --remotetunnel"
|
|
fi
|
|
|
|
if [[ -n "$KCPP_GIT" ]]; then
|
|
apt update && apt install git curl bzip2 -y
|
|
git clone --recurse-submodules $KCPP_GIT koboldcpp
|
|
cd koboldcpp
|
|
KCPP_BIN=condascript
|
|
else
|
|
if [ -f "./koboldcpp.py" ]; then
|
|
echo "A bundled KoboldCpp was detected, we will be using the bundled copy."
|
|
KCPP_BIN=python
|
|
else
|
|
KCPP_BIN=https://koboldai.org/cpplinuxcu12
|
|
|
|
if [ ! -f "/usr/bin/nvidia-smi" ]; then
|
|
if [[ -e "/dev/dri" && "${KCPP_ARGS:-}" != *"--usevulkan"* ]]; then
|
|
echo "GPU might be AMD and Vulkan not explicitly requested, enabling ROCm support."
|
|
KCPP_BIN=https://koboldai.org/cpplinuxrocm
|
|
if ! grep -q avx2 "/proc/cpuinfo"; then
|
|
echo "Ancient CPU detected: $(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1')"
|
|
echo "This CPU does not have AVX1 support, we will be using a slower AVX1 mode with Vulkan, compatibility and speed will be degraded."
|
|
echo "If your GPU is not Vulkan compatible do not pass it trough to this docker."
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --noavx2"
|
|
KCPP_BIN=https://koboldai.org/cpplinuxnocu
|
|
sleep 30
|
|
fi
|
|
else
|
|
echo "NVIDIA/AMD have not been detected, generic KoboldCpp will be used."
|
|
KCPP_BIN=https://koboldai.org/cpplinuxnocu
|
|
if ! grep -q avx2 "/proc/cpuinfo"; then
|
|
echo "Ancient CPU detected: $(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1')"
|
|
echo "This CPU does not have AVX2 support, we will be using a much slower AVX1 mode, expect bad performance".
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --noavx2"
|
|
sleep 30
|
|
fi
|
|
fi
|
|
else
|
|
if nvidia-smi | grep -q 'CUDA Version: 11'; then
|
|
echo "WARNING: CUDA 11 detected, we will use a binary with AVX1 and CUDA11 for legacy compatibility. Performance is not representative of KoboldCpp's ability."
|
|
echo "If your GPU supports a newer CUDA version it is highly recommended to update the drivers, if this is a cloud instance consider switching to an updated instance for maximum performance."
|
|
KCPP_BIN=https://koboldai.org/cpplinux
|
|
sleep 15
|
|
fi
|
|
if nvidia-smi | grep -q 'CUDA Version: 12.0'; then
|
|
echo "WARNING: CUDA 12.0 detected, we will use a binary with AVX1 and CUDA11 for legacy compatibility. Performance is not representative of KoboldCpp's ability."
|
|
echo "If your GPU supports a newer CUDA version it is highly recommended to update the drivers, if this is a cloud instance consider switching to an updated instance for maximum performance."
|
|
KCPP_BIN=https://koboldai.org/cpplinux
|
|
sleep 15
|
|
fi
|
|
if ! grep -q avx2 "/proc/cpuinfo"; then
|
|
echo "Ancient CPU detected: $(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1')"
|
|
echo "This CPU does not have AVX2 support, we will be using AVX1 and CUDA 11, expect worse performance especially when offloading layers.".
|
|
echo "If this is a cloud instance its recommended to switch to an instance with a modern CPU".
|
|
KCPP_BIN=https://koboldai.org/cpplinux
|
|
sleep 30
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [[ -n "$KCPP_BIN_OVERRIDE" ]]; then
|
|
echo "KCPP Binary Override applied, you are responsible this binary is compatible with the hardware."
|
|
KCPP_BIN=$KCPP_BIN_OVERRIDE
|
|
fi
|
|
|
|
if [[ -n "$KCPP_CONFIG_GIT" ]]; then
|
|
apt update && apt install git -y
|
|
git clone --recurse-submodules $KCPP_CONFIG_GIT configs
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --admin --admindir configs"
|
|
fi
|
|
|
|
if [[ $KCPP_MODEL =~ "LLaMA2-13B" ]]; then
|
|
echo "LLaMA2 13B detected, loading default Llava model."
|
|
export KCPP_MMPROJ="https://huggingface.co/koboldcpp/mmproj/resolve/main/llama-13b-mmproj-v1.5.Q4_1.gguf?download=true"
|
|
fi
|
|
|
|
if [[ -n "$KCPP_MODEL" ]]; then
|
|
KCPP_MODEL=${KCPP_MODEL/"blob"/"resolve"}
|
|
if [[ $KCPP_MODEL =~ "," ]]; then
|
|
if [[ $KCPP_MODEL =~ "00001-of-" ]]; then
|
|
SPLIT_FIRST_FILE=${KCPP_MODEL%%,*}
|
|
SPLIT_FIRST_FILE=${SPLIT_FIRST_FILE##*/}
|
|
SPLIT_FIRST_FILE=${SPLIT_FIRST_FILE%%[?#]*}
|
|
echo "$SPLIT_FIRST_FILE is a gguf-split file make sure to append all split files with a comma"
|
|
for i in ${KCPP_MODEL//,/ }
|
|
do
|
|
url_filename=${i##*/}
|
|
url_filename=${url_filename%%[?#]*}
|
|
aria2c -x 16 -s 16 -o $url_filename -d splitmodel --summary-interval=5 --download-result=default --continue=true --file-allocation=none ${i/"blob"/"resolve"}
|
|
done
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --model splitmodel/$SPLIT_FIRST_FILE"
|
|
else
|
|
for i in ${KCPP_MODEL//,/ }
|
|
do
|
|
aria2c -x 16 -s 16 -o kcpp_append -d /tmp --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none ${i/"blob"/"resolve"}
|
|
echo Appending split... Please wait.
|
|
cat /tmp/kcpp_append >> ./model.gguf
|
|
rm /tmp/kcpp_append
|
|
done
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --model ./model.gguf"
|
|
fi
|
|
else
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --model $KCPP_MODEL"
|
|
fi
|
|
fi
|
|
|
|
if [[ -n "$KCPP_IMGMODEL" ]]; then
|
|
if [[ $KCPP_IMGMODEL =~ "," ]]; then
|
|
for i in ${KCPP_IMGMODEL//,/ }
|
|
do
|
|
aria2c -x 16 -s 16 -o kcpp_append -d /tmp --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none ${i/"blob"/"resolve"}
|
|
echo Appending split... Please wait.
|
|
cat /tmp/kcpp_append >> ./imgmodel.gguf
|
|
rm /tmp/kcpp_append
|
|
done
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --sdmodel ./imgmodel.gguf"
|
|
else
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --sdmodel $KCPP_IMGMODEL"
|
|
fi
|
|
|
|
fi
|
|
|
|
if [[ -n "$KCPP_MMPROJ" ]]; then
|
|
if [[ $KCPP_MMPROJ =~ "," ]]; then
|
|
for i in ${KCPP_MMPROJ//,/ }
|
|
do
|
|
aria2c -x 16 -s 16 -o kcpp_append -d /tmp --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none ${i/"blob"/"resolve"}
|
|
echo Appending split... Please wait.
|
|
cat /tmp/kcpp_append >> ./mmproj.gguf
|
|
rm /tmp/kcpp_append
|
|
done
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --mmproj ./mmproj.gguf"
|
|
else
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --mmproj $KCPP_MMPROJ"
|
|
fi
|
|
fi
|
|
|
|
if [[ -n "$KCPP_EMBEDMODEL" ]]; then
|
|
if [[ $KCPP_EMBEDMODEL =~ "," ]]; then
|
|
for i in ${KCPP_EMBEDMODEL//,/ }
|
|
do
|
|
aria2c -x 16 -s 16 -o kcpp_append -d /tmp --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none ${i/"blob"/"resolve"}
|
|
echo Appending split... Please wait.
|
|
cat /tmp/kcpp_append >> ./embeddings.gguf
|
|
rm /tmp/kcpp_append
|
|
done
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --embeddingsmodel ./embeddings.gguf"
|
|
else
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --embeddingsmodel $KCPP_EMBEDMODEL"
|
|
fi
|
|
fi
|
|
|
|
if [[ -n "$KCPP_WHISPERMODEL" ]]; then
|
|
if [[ $KCPP_WHISPERMODEL =~ "," ]]; then
|
|
for i in ${KCPP_WHISPERMODEL//,/ }
|
|
do
|
|
aria2c -x 16 -s 16 -o kcpp_append -d /tmp --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none ${i/"blob"/"resolve"}
|
|
echo Appending split... Please wait.
|
|
cat /tmp/kcpp_append >> ./whisper.gguf
|
|
rm /tmp/kcpp_append
|
|
done
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --whispermodel ./whisper.gguf"
|
|
else
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --whispermodel $KCPP_WHISPERMODEL"
|
|
fi
|
|
fi
|
|
|
|
if [[ -n "$KCPP_TTSMODEL" ]]; then
|
|
if [[ $KCPP_TTSMODEL =~ "," ]]; then
|
|
for i in ${KCPP_TTSMODEL//,/ }
|
|
do
|
|
aria2c -x 16 -s 16 -o kcpp_append -d /tmp --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none ${i/"blob"/"resolve"}
|
|
echo Appending split... Please wait.
|
|
cat /tmp/kcpp_append >> ./ttsmodel.gguf
|
|
rm /tmp/kcpp_append
|
|
done
|
|
else
|
|
aria2c -x 16 -s 16 -o ttsmodel.gguf --summary-interval=5 --download-result=default --continue=true --file-allocation=none ${KCPP_TTSMODEL/"blob"/"resolve"}
|
|
fi
|
|
aria2c -x 16 -s 16 -o wavmodel.gguf --summary-interval=5 --download-result=default --continue=true --file-allocation=none https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf
|
|
KCPP_CMDLINE="$KCPP_CMDLINE --ttsmodel ./ttsmodel.gguf --ttswavtokenizer ./wavmodel.gguf"
|
|
fi
|
|
|
|
if [[ $KCPP_BIN == "python" ]]; then
|
|
bash -c "python3 koboldcpp.py --quiet $KCPP_CMDLINE"
|
|
elif [[ $KCPP_BIN == "condascript" ]]; then
|
|
bash -c "./koboldcpp.sh --quiet $KCPP_CMDLINE"
|
|
else
|
|
if [[ $KCPP_DONT_UPDATE == "true" ]] && [[ -f "./koboldcpp" ]]; then
|
|
echo Update check skipped
|
|
else
|
|
aria2c -x 16 -s 16 -o koboldcpp --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $KCPP_BIN && chmod +x ./koboldcpp
|
|
fi
|
|
bash -c "./koboldcpp --quiet $KCPP_CMDLINE" # Dumb double bash workaround because otherwise user defined quotes don't work for some reason - Henk
|
|
fi
|
|
echo "Something possibly went wrong, stalling for 3 minutes before exiting so you can check for errors. (No error? You may have run out of memory. Try deleting the image generation model if you don't need it or use a larger GPU.)"
|
|
echo "Need some help? https://koboldai.org/discord for one on one support"
|