koboldcpp/colab.ipynb

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "view-in-github"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/LostRuins/koboldcpp/blob/concedo/colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2FCn5tmpn3UV"
      },
      "source": [
        "## Welcome to the Official KoboldCpp Colab Notebook\n",
        "It's really easy to get started. Just press the two **Play** buttons below, and then connect to the **Cloudflare URL** shown at the end.\n",
        "You can select a model from the dropdown, or enter a **custom URL** to a GGUF model (Example: `https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf`)\n",
        "\n",
        "**Keep this page open and occationally check for captcha's so that your AI is not shut down**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "QNaj3u0jn3UW"
      },
      "outputs": [],
      "source": [
        "#@title <-- Tap this if you play on Mobile { display-mode: \"form\" }\n",
        "%%html\n",
        "<b>Press play on the music player to keep the tab alive, then start KoboldCpp below</b><br/>\n",
        "<audio autoplay=\"\" src=\"https://raw.githubusercontent.com/KoboldAI/KoboldAI-Client/main/colab/silence.m4a\" loop controls>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "uJS9i_Dltv8Y"
      },
      "outputs": [],
      "source": [
        "#@title <b>v-- Enter your model below and then click this to start Koboldcpp</b>\r\n",
        "\r\n",
        "Model = \"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf\" #@param [\"https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_M.gguf\",\"https://huggingface.co/Sao10K/Fimbulvetr-11B-v2-GGUF/resolve/main/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-13B-GGUF/resolve/main/mythomax-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/ReMM-SLERP-L2-13B-GGUF/resolve/main/remm-slerp-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Xwin-LM-13B-v0.2-GGUF/resolve/main/xwin-lm-13b-v0.2.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/Stheno-L2-13B-GGUF/resolve/main/stheno-l2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/MythoMax-L2-Kimiko-v2-13B-GGUF/resolve/main/mythomax-l2-kimiko-v2-13b.Q4_K_M.gguf\",\"https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf\",\"https://huggingface.co/afrideva/phi-2-uncensored-GGUF/resolve/main/phi-2-uncensored.q3_k_m.gguf\"]{allow-input: true}\r\n",
        "Layers = 99 #@param [99]{allow-input: true}\r\n",
        "ContextSize = 4096 #@param [4096] {allow-input: true}\r\n",
        "ForceRebuild = False #@param {type:\"boolean\"}\r\n",
        "LoadImageModel = False \r\n",
        "SDModel = \"\"\r\n",
        "SDCommand = \"\"\r\n",
        "\r\n",
        "import os\r\n",
        "if not os.path.isfile(\"/opt/bin/nvidia-smi\"):\r\n",
        "  raise RuntimeError(\"⚠️Colab did not give you a GPU due to usage limits, this can take a few hours before they let you back in. Check out https://lite.koboldai.net for a free alternative (that does not provide an API link but can load KoboldAI saves and chat cards) or subscribe to Colab Pro for immediate access.⚠️\")\r\n",
        "\r\n",
        "%cd /content\r\n",
        "!git clone https://github.com/LostRuins/koboldcpp\r\n",
        "%cd /content/koboldcpp\r\n",
        "kvers = !(cat koboldcpp.py | grep 'KcppVersion = ' | cut -d '\"' -f2)\r\n",
        "kvers = kvers[0]\r\n",
        "if ForceRebuild:\r\n",
        "  kvers = \"force_rebuild\"\r\n",
        "if SDModel and LoadImageModel:\r\n",
        "  SDCommand = \"--sdconfig sdmodel.safetensors clamped 4 quant\"\r\n",
        "else:\r\n",
        "  SDCommand = \"\"\r\n",
        "!echo Finding prebuilt binary for {kvers}\r\n",
        "!wget -O dlfile.tmp https://kcppcolab.concedo.workers.dev/?{kvers} && mv dlfile.tmp koboldcpp_cublas.so\r\n",
        "!test -f koboldcpp_cublas.so && echo Prebuilt Binary Exists || echo Prebuilt Binary Does Not Exist\r\n",
        "!test -f koboldcpp_cublas.so && echo Build Skipped || make koboldcpp_cublas LLAMA_CUBLAS=1 LLAMA_COLAB=1 LLAMA_PORTABLE=1\r\n",
        "!cp koboldcpp_cublas.so koboldcpp_cublas.dat\r\n",
        "!apt update\r\n",
        "!apt install aria2 -y\r\n",
        "!aria2c -x 10 -o model.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $Model\r\n",
        "if SDCommand:\r\n",
        "  !aria2c -x 10 -o sdmodel.safetensors --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $SDModel\r\n",
        "!python koboldcpp.py model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --quiet --remotetunnel $SDCommand\r\n"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "cell_execution_strategy": "setup",
      "gpuType": "T4",
      "include_colab_link": true,
      "private_outputs": true,
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}