version: "3.2" services: koboldcpp: container_name: koboldcpp image: koboldai/koboldcpp:latest security_opt: - apparmor:unconfined # AppArmor breaks sockets, don't worry everything is securely confined within docker volumes: - ./kcpp-data:/workspace/:rw # devices: # Uncomment this section to add support for AMD/Intel GPU's # - /dev/dri:/dev/dri deploy: # You can remove this section if you do not wish to use an Nvidia GPU resources: reservations: devices: - driver: nvidia device_ids: ['0'] capabilities: [gpu] environment: - KCPP_DONT_UPDATE=false - KCPP_DONT_TUNNEL=true - KCPP_ARGS=--model https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_S.gguf?download=true --gpulayers 99 --multiuser 20 --admin --admindir . --adminpassword ChangeMe # For split models in the 00001-of format specify the first file as the --model, remove --gpulayers 99 --multiuser 20 if you do not have a GPU # Don't forget to change the model for production, the default model was chosen to fit on many systems rather than for its quality. ports: - "5001:5001" restart: unless-stopped