koboldcpp/examples/docker-reference/docker-compose.yml

version: "3.2"
services:
  koboldcpp:
    container_name: koboldcpp
    image: koboldai/koboldcpp:latest
    security_opt:
      - apparmor:unconfined # AppArmor breaks sockets, don't worry everything is securely confined within docker
    volumes:
      - ./kcpp-data:/workspace/:rw
    # devices: # Uncomment this section to add support for AMD/Intel GPU's
    # - /dev/dri:/dev/dri
    deploy: # You can remove this section if you do not wish to use an Nvidia GPU
      resources:
        reservations:
          devices:
          - driver: nvidia
            device_ids: ['0']
            capabilities: [gpu]
    environment:
      - KCPP_DONT_UPDATE=false
      - KCPP_DONT_TUNNEL=true
      - KCPP_ARGS=--model https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_S.gguf?download=true --gpulayers 99 --multiuser 20 --admin --admindir . --adminpassword ChangeMe # For split models in the 00001-of format specify the first file as the --model, remove --gpulayers 99 --multiuser 20 if you do not have a GPU
      # Don't forget to change the model for production, the default model was chosen to fit on many systems rather than for its quality.
    ports:
      - "5001:5001"
    restart: unless-stopped