mirror of
https://github.com/lfnovo/open-notebook.git
synced 2026-04-28 03:19:59 +00:00
In air-gapped / offline Docker deployments, tiktoken.get_encoding() tries
to download the encoding file from openaipublic.blob.core.windows.net.
When that request fails it raises a URLError / OSError — not an ImportError
— so the previous except clause silently missed it and the crash surfaced in
the UI.
Widened `except ImportError` to `except Exception` so all failures —
"not installed" and "network unreachable" — fall through to the word-count
fallback (words × 1.3). Added a loguru WARNING so operators can see when
the fallback is active.
TIKTOKEN_CACHE_DIR now reads from the environment with a blank-safe
fallback (`or` guard prevents os.makedirs("") on empty env var). This lets
Docker images redirect the cache to a path outside /app/data/ so user-data
volume mounts cannot shadow the pre-baked encoding.
Both images now pre-download the o200k_base encoding during the builder
stage (internet is available at build time) and copy it into the runtime
image at /app/tiktoken-cache. ENV TIKTOKEN_CACHE_DIR=/app/tiktoken-cache
is set in the runtime stage so no network call is ever needed at runtime.
Added test_token_count_network_error_fallback in tests/test_utils.py:
patches tiktoken.get_encoding with a URLError and asserts token_count()
returns a positive int instead of raising.
Fixes #264
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
126 lines
4.4 KiB
Docker
126 lines
4.4 KiB
Docker
# Build stage
|
|
FROM python:3.12-slim-bookworm AS builder
|
|
|
|
# Install uv using the official method
|
|
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
|
|
|
# Install system dependencies required for building certain Python packages
|
|
# Add Node.js 20.x LTS for building frontend
|
|
# NOTE: gcc/g++/make removed - uv should download pre-built wheels. Add back if build fails.
|
|
# NOTE: gcc/g++/make required for some python dependencies
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
curl \
|
|
build-essential \
|
|
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
|
&& apt-get install -y nodejs \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Set build optimization environment variables
|
|
ENV MAKEFLAGS="-j$(nproc)"
|
|
ENV PYTHONDONTWRITEBYTECODE=1
|
|
ENV PYTHONUNBUFFERED=1
|
|
ENV UV_COMPILE_BYTECODE=1
|
|
ENV UV_LINK_MODE=copy
|
|
|
|
# Set the working directory in the container to /app
|
|
WORKDIR /app
|
|
|
|
# Copy dependency files and minimal package structure first for better layer caching
|
|
COPY pyproject.toml uv.lock ./
|
|
COPY open_notebook/__init__.py ./open_notebook/__init__.py
|
|
|
|
# Install dependencies with optimizations (this layer will be cached unless dependencies change)
|
|
RUN uv sync --frozen --no-dev
|
|
|
|
# Pre-download tiktoken encoding so the app works offline (issue #264).
|
|
# /app/tiktoken-cache is intentionally outside /app/data/ so that volume mounts
|
|
# of /app/data (for user data persistence) do not hide the pre-baked encoding.
|
|
# config.py reads TIKTOKEN_CACHE_DIR from the environment to pick up this path.
|
|
ENV TIKTOKEN_CACHE_DIR=/app/tiktoken-cache
|
|
RUN mkdir -p /app/tiktoken-cache && \
|
|
.venv/bin/python -c "import tiktoken; tiktoken.get_encoding('o200k_base')"
|
|
|
|
# Copy the rest of the application code
|
|
COPY . /app
|
|
|
|
# Install frontend dependencies and build
|
|
WORKDIR /app/frontend
|
|
ARG NPM_REGISTRY=https://registry.npmjs.org/
|
|
COPY frontend/package.json frontend/package-lock.json ./
|
|
RUN npm config set registry ${NPM_REGISTRY}
|
|
RUN npm ci
|
|
COPY frontend/ ./
|
|
RUN npm run build
|
|
|
|
# Return to app root
|
|
WORKDIR /app
|
|
|
|
# Runtime stage
|
|
FROM python:3.12-slim-bookworm AS runtime
|
|
|
|
# Install only runtime system dependencies (no build tools)
|
|
# Add Node.js 20.x LTS for running frontend
|
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
|
|
ffmpeg \
|
|
supervisor \
|
|
curl \
|
|
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
|
&& apt-get install -y nodejs \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install uv using the official method
|
|
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
|
|
|
|
# Set the working directory in the container to /app
|
|
WORKDIR /app
|
|
|
|
# Copy the virtual environment from builder stage
|
|
COPY --from=builder /app/.venv /app/.venv
|
|
|
|
# Copy the source code (the rest)
|
|
COPY . /app
|
|
|
|
# Copy pre-downloaded tiktoken encoding from builder (outside /data/ — volume-mount safe)
|
|
COPY --from=builder /app/tiktoken-cache /app/tiktoken-cache
|
|
|
|
# Ensure uv uses the existing venv without attempting network operations
|
|
ENV UV_NO_SYNC=1
|
|
ENV VIRTUAL_ENV=/app/.venv
|
|
# Point the app at the pre-baked tiktoken encoding (see open_notebook/config.py)
|
|
ENV TIKTOKEN_CACHE_DIR=/app/tiktoken-cache
|
|
|
|
# Bind Next.js to all interfaces (required for Docker networking and reverse proxies)
|
|
ENV HOSTNAME=0.0.0.0
|
|
|
|
# Copy built frontend from builder stage
|
|
COPY --from=builder /app/frontend/.next/standalone /app/frontend/
|
|
COPY --from=builder /app/frontend/.next/static /app/frontend/.next/static
|
|
COPY --from=builder /app/frontend/public /app/frontend/public
|
|
COPY --from=builder /app/frontend/start-server.js /app/frontend/start-server.js
|
|
|
|
# Expose ports for Frontend and API
|
|
EXPOSE 8502 5055
|
|
|
|
RUN mkdir -p /app/data
|
|
|
|
# Copy and make executable the wait-for-api script
|
|
COPY scripts/wait-for-api.sh /app/scripts/wait-for-api.sh
|
|
RUN chmod +x /app/scripts/wait-for-api.sh
|
|
|
|
# Copy supervisord configuration
|
|
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
|
|
|
# Create log directories
|
|
RUN mkdir -p /var/log/supervisor
|
|
|
|
# Runtime API URL Configuration
|
|
# The API_URL environment variable can be set at container runtime to configure
|
|
# where the frontend should connect to the API. This allows the same Docker image
|
|
# to work in different deployment scenarios without rebuilding.
|
|
#
|
|
# If not set, the system will auto-detect based on incoming requests.
|
|
# Set API_URL when using reverse proxies or custom domains.
|
|
#
|
|
# Example: docker run -e API_URL=https://your-domain.com/api ...
|
|
|
|
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|