feat: add cascade deletion for notebooks with delete preview (#471)

* feat: decrease chunking size for maximum ollama compatibility * docs: improve i18n info on Claude.md * feat: add cascade deletion for notebooks with delete preview - Add Notebook.get_delete_preview() to show counts of affected items - Add Notebook.delete(delete_exclusive_sources) for cascade deletion - Always delete notes when notebook is deleted - Allow user to choose: delete or keep exclusive sources - Shared sources are always unlinked but never deleted - Add NotebookDeleteDialog component with radio button options - Add delete-preview API endpoint - Update delete endpoint with delete_exclusive_sources param - Add i18n support for all 5 locales Closes #77 * docs: remove harcoded config settings
2026-04-28 19:40:50 +00:00 · 2026-01-25 14:56:14 -03:00 · 2026-01-25 14:56:14 -03:00 · 4e411e0488
commit 4e411e0488
parent f14020d385
19 changed files with 527 additions and 55 deletions
--- a/open_notebook/utils/CLAUDE.md
+++ b/open_notebook/utils/CLAUDE.md
@ -39,8 +39,8 @@ Each utility is stateless and can be imported independently.

 ### chunking.py
 - **ContentType**: Enum (HTML, MARKDOWN, PLAIN)
- **CHUNK_SIZE**: 1500 characters (constant)
- **CHUNK_OVERLAP**: 225 characters (15% overlap)
+- **CHUNK_SIZE**: constant
+- **CHUNK_OVERLAP**: constant
 - **detect_content_type_from_extension(file_path)**: Detect type from file extension
 - **detect_content_type_from_heuristics(text)**: Detect type from content patterns (returns type + confidence)
 - **detect_content_type(text, file_path)**: Combined detection (extension primary, heuristics fallback)
--- a/open_notebook/utils/chunking.py
+++ b/open_notebook/utils/chunking.py
@ -22,8 +22,8 @@ from langchain_text_splitters import (
 from loguru import logger

 # Constants
-CHUNK_SIZE = 1500  # characters
-CHUNK_OVERLAP = 225  # 15% of chunk size
+CHUNK_SIZE = 1200  # characters
+CHUNK_OVERLAP = 180  # 15% of chunk size
 HIGH_CONFIDENCE_THRESHOLD = 0.8  # Threshold for heuristics to override extension


@ -73,7 +73,9 @@ _EXTENSION_TO_CONTENT_TYPE = {
 }


-def detect_content_type_from_extension(file_path: Optional[str]) -> Optional[ContentType]:
+def detect_content_type_from_extension(
+    file_path: Optional[str],
+) -> Optional[ContentType]:
    """
    Detect content type from file extension.

@ -220,9 +222,7 @@ def _calculate_markdown_score(text: str) -> float:
    return min(score, 1.0)


-def detect_content_type(
-    text: str, file_path: Optional[str] = None
-) -> ContentType:
+def detect_content_type(text: str, file_path: Optional[str] = None) -> ContentType:
    """
    Detect content type using file extension (primary) and heuristics (fallback).

@ -352,12 +352,18 @@ def chunk_text(
        splitter = _get_html_splitter()
        # HTML splitter returns Document objects
        docs = splitter.split_text(text)
-        chunks = [doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs]
+        chunks = [
+            doc.page_content if hasattr(doc, "page_content") else str(doc)
+            for doc in docs
+        ]
    elif content_type == ContentType.MARKDOWN:
        splitter = _get_markdown_splitter()
        # Markdown splitter returns Document objects
        docs = splitter.split_text(text)
-        chunks = [doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs]
+        chunks = [
+            doc.page_content if hasattr(doc, "page_content") else str(doc)
+            for doc in docs
+        ]
    else:
        # Plain text - use recursive splitter directly
        splitter = _get_plain_splitter()