From 4efe613f698cee13a845d616fb2cac206f6490a0 Mon Sep 17 00:00:00 2001 From: Artyom Mezin Date: Sun, 19 Apr 2026 21:37:42 +0300 Subject: [PATCH] Make embedding batch size configurable (#742) * Make embedding batch size configurable * Address embedding batch size review nits --- CHANGELOG.md | 3 +++ docs/1-INSTALLATION/docker-compose.md | 1 + docs/5-CONFIGURATION/environment-reference.md | 8 ++++++ open_notebook/utils/embedding.py | 25 ++++++++++++++++++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84893d8..39d06ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- `OPEN_NOTEBOOK_EMBEDDING_BATCH_SIZE` environment variable to override the embedding batch size; default remains `50`. Helps with CPU-only local embedding and stricter OpenAI-compatible endpoints (#735) + ## [1.8.5] - 2026-04-14 ### Changed diff --git a/docs/1-INSTALLATION/docker-compose.md b/docs/1-INSTALLATION/docker-compose.md index dd45dc5..8a97499 100644 --- a/docs/1-INSTALLATION/docker-compose.md +++ b/docs/1-INSTALLATION/docker-compose.md @@ -197,6 +197,7 @@ Configure Ollama in the Settings UI: | `SURREAL_NAMESPACE` | Database namespace | `open_notebook` | | `SURREAL_DATABASE` | Database name | `open_notebook` | | `API_URL` | API external URL | `http://localhost:5055` | +| `OPEN_NOTEBOOK_EMBEDDING_BATCH_SIZE` | Override embedding batch size for stricter/local providers (recommended: `8` for CPU-only local setups) | `50` | See [Environment Reference](../5-CONFIGURATION/environment-reference.md) for complete list. diff --git a/docs/5-CONFIGURATION/environment-reference.md b/docs/5-CONFIGURATION/environment-reference.md index ca50944..e650710 100644 --- a/docs/5-CONFIGURATION/environment-reference.md +++ b/docs/5-CONFIGURATION/environment-reference.md @@ -61,6 +61,14 @@ Comprehensive list of all environment variables available in Open Notebook. --- +## Embeddings + +| Variable | Required? | Default | Description | +|----------|-----------|---------|-------------| +| `OPEN_NOTEBOOK_EMBEDDING_BATCH_SIZE` | No | 50 | Number of texts sent per embedding batch. Lower this for CPU-only or stricter OpenAI-compatible embedding providers. | + +--- + ## Text-to-Speech (TTS) | Variable | Required? | Default | Description | diff --git a/open_notebook/utils/embedding.py b/open_notebook/utils/embedding.py index af2035b..713e870 100644 --- a/open_notebook/utils/embedding.py +++ b/open_notebook/utils/embedding.py @@ -11,6 +11,7 @@ to ensure consistent behavior and proper handling of large content. """ import asyncio +import os from typing import TYPE_CHECKING, List, Optional import numpy as np @@ -19,7 +20,29 @@ from loguru import logger from .chunking import CHUNK_SIZE, ContentType, chunk_text from .token_utils import token_count -EMBEDDING_BATCH_SIZE = 50 + +def _get_embedding_batch_size() -> int: + """ + Read the embedding batch size from the environment. + + This is intentionally configurable because provider limits vary widely, and + CPU-only local embedding endpoints often need smaller batches than cloud APIs. + """ + raw = os.getenv("OPEN_NOTEBOOK_EMBEDDING_BATCH_SIZE", "50").strip() + try: + value = int(raw) + if value < 1: + raise ValueError + return value + except ValueError: + logger.warning( + "Invalid OPEN_NOTEBOOK_EMBEDDING_BATCH_SIZE='{}'; falling back to 50", + raw, + ) + return 50 + + +EMBEDDING_BATCH_SIZE = _get_embedding_batch_size() EMBEDDING_MAX_RETRIES = 3 EMBEDDING_RETRY_DELAY = 2 # seconds