mirror of
https://github.com/lfnovo/open-notebook.git
synced 2026-04-28 19:40:50 +00:00
feat: add cascade deletion for notebooks with delete preview (#471)
* feat: decrease chunking size for maximum ollama compatibility * docs: improve i18n info on Claude.md * feat: add cascade deletion for notebooks with delete preview - Add Notebook.get_delete_preview() to show counts of affected items - Add Notebook.delete(delete_exclusive_sources) for cascade deletion - Always delete notes when notebook is deleted - Allow user to choose: delete or keep exclusive sources - Shared sources are always unlinked but never deleted - Add NotebookDeleteDialog component with radio button options - Add delete-preview API endpoint - Update delete endpoint with delete_exclusive_sources param - Add i18n support for all 5 locales Closes #77 * docs: remove harcoded config settings
This commit is contained in:
parent
f14020d385
commit
4e411e0488
19 changed files with 527 additions and 55 deletions
|
|
@ -25,6 +25,8 @@ Two base classes support different persistence patterns: **ObjectModel** (mutabl
|
|||
### notebook.py
|
||||
- **Notebook**: Research project container
|
||||
- `get_sources()`, `get_notes()`, `get_chat_sessions()`: Navigate relationships
|
||||
- `get_delete_preview()`: Returns counts of notes, exclusive sources, and shared sources that would be affected by deletion
|
||||
- `delete(delete_exclusive_sources)`: Cascade deletion - always deletes notes, optionally deletes exclusive sources, always unlinks all sources
|
||||
|
||||
- **Source**: Content item (file/URL)
|
||||
- `vectorize()`: Submit async embedding job (returns command_id, fire-and-forget)
|
||||
|
|
|
|||
|
|
@ -85,6 +85,150 @@ class Notebook(ObjectModel):
|
|||
logger.exception(e)
|
||||
raise DatabaseOperationError(e)
|
||||
|
||||
async def get_delete_preview(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get counts of items that would be affected by deleting this notebook.
|
||||
|
||||
Returns a dict with:
|
||||
- note_count: Number of notes that will be deleted
|
||||
- exclusive_source_count: Sources only in this notebook (can be deleted)
|
||||
- shared_source_count: Sources in other notebooks (will be unlinked only)
|
||||
"""
|
||||
try:
|
||||
notebook_id = ensure_record_id(self.id)
|
||||
|
||||
# Count notes
|
||||
note_result = await repo_query(
|
||||
"SELECT count() as count FROM artifact WHERE out = $notebook_id GROUP ALL",
|
||||
{"notebook_id": notebook_id},
|
||||
)
|
||||
note_count = note_result[0]["count"] if note_result else 0
|
||||
|
||||
# Get sources with count of references to OTHER notebooks
|
||||
# If assigned_others = 0, source is exclusive to this notebook
|
||||
# If assigned_others > 0, source is shared with other notebooks
|
||||
source_counts = await repo_query(
|
||||
"""
|
||||
SELECT
|
||||
id,
|
||||
count(->reference[WHERE out != $notebook_id].out) as assigned_others
|
||||
FROM (SELECT VALUE <-reference.in AS sources FROM $notebook_id)[0]
|
||||
""",
|
||||
{"notebook_id": notebook_id},
|
||||
)
|
||||
|
||||
exclusive_count = 0
|
||||
shared_count = 0
|
||||
for src in source_counts:
|
||||
if src.get("assigned_others", 0) == 0:
|
||||
exclusive_count += 1
|
||||
else:
|
||||
shared_count += 1
|
||||
|
||||
return {
|
||||
"note_count": note_count,
|
||||
"exclusive_source_count": exclusive_count,
|
||||
"shared_source_count": shared_count,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting delete preview for notebook {self.id}: {e}")
|
||||
logger.exception(e)
|
||||
raise DatabaseOperationError(e)
|
||||
|
||||
async def delete(self, delete_exclusive_sources: bool = False) -> Dict[str, int]:
|
||||
"""
|
||||
Delete notebook with cascade deletion of notes and optional source deletion.
|
||||
|
||||
Args:
|
||||
delete_exclusive_sources: If True, also delete sources that belong
|
||||
only to this notebook. Default is False.
|
||||
|
||||
Returns:
|
||||
Dict with counts: deleted_notes, deleted_sources, unlinked_sources
|
||||
"""
|
||||
if self.id is None:
|
||||
raise InvalidInputError("Cannot delete notebook without an ID")
|
||||
|
||||
try:
|
||||
notebook_id = ensure_record_id(self.id)
|
||||
deleted_notes = 0
|
||||
deleted_sources = 0
|
||||
unlinked_sources = 0
|
||||
|
||||
# 1. Get and delete all notes linked to this notebook
|
||||
notes = await self.get_notes()
|
||||
for note in notes:
|
||||
await note.delete()
|
||||
deleted_notes += 1
|
||||
logger.info(f"Deleted {deleted_notes} notes for notebook {self.id}")
|
||||
|
||||
# Delete artifact relationships
|
||||
await repo_query(
|
||||
"DELETE artifact WHERE out = $notebook_id",
|
||||
{"notebook_id": notebook_id},
|
||||
)
|
||||
|
||||
# 2. Handle sources
|
||||
if delete_exclusive_sources:
|
||||
# Find sources with count of references to OTHER notebooks
|
||||
# If assigned_others = 0, source is exclusive to this notebook
|
||||
source_counts = await repo_query(
|
||||
"""
|
||||
SELECT
|
||||
id,
|
||||
count(->reference[WHERE out != $notebook_id].out) as assigned_others
|
||||
FROM (SELECT VALUE <-reference.in AS sources FROM $notebook_id)[0]
|
||||
""",
|
||||
{"notebook_id": notebook_id},
|
||||
)
|
||||
|
||||
for src in source_counts:
|
||||
source_id = src.get("id")
|
||||
if source_id and src.get("assigned_others", 0) == 0:
|
||||
# Exclusive source - delete it
|
||||
try:
|
||||
source = await Source.get(str(source_id))
|
||||
await source.delete()
|
||||
deleted_sources += 1
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to delete exclusive source {source_id}: {e}"
|
||||
)
|
||||
else:
|
||||
unlinked_sources += 1
|
||||
else:
|
||||
# Just count sources that will be unlinked
|
||||
source_result = await repo_query(
|
||||
"SELECT count() as count FROM reference WHERE out = $notebook_id GROUP ALL",
|
||||
{"notebook_id": notebook_id},
|
||||
)
|
||||
unlinked_sources = source_result[0]["count"] if source_result else 0
|
||||
|
||||
# Delete reference relationships (unlink all sources)
|
||||
await repo_query(
|
||||
"DELETE reference WHERE out = $notebook_id",
|
||||
{"notebook_id": notebook_id},
|
||||
)
|
||||
logger.info(
|
||||
f"Unlinked {unlinked_sources} sources, deleted {deleted_sources} "
|
||||
f"exclusive sources for notebook {self.id}"
|
||||
)
|
||||
|
||||
# 3. Delete the notebook record itself
|
||||
await super().delete()
|
||||
logger.info(f"Deleted notebook {self.id}")
|
||||
|
||||
return {
|
||||
"deleted_notes": deleted_notes,
|
||||
"deleted_sources": deleted_sources,
|
||||
"unlinked_sources": unlinked_sources,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting notebook {self.id}: {e}")
|
||||
logger.exception(e)
|
||||
raise DatabaseOperationError(f"Failed to delete notebook: {e}")
|
||||
|
||||
|
||||
class Asset(BaseModel):
|
||||
file_path: Optional[str] = None
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ Each utility is stateless and can be imported independently.
|
|||
|
||||
### chunking.py
|
||||
- **ContentType**: Enum (HTML, MARKDOWN, PLAIN)
|
||||
- **CHUNK_SIZE**: 1500 characters (constant)
|
||||
- **CHUNK_OVERLAP**: 225 characters (15% overlap)
|
||||
- **CHUNK_SIZE**: constant
|
||||
- **CHUNK_OVERLAP**: constant
|
||||
- **detect_content_type_from_extension(file_path)**: Detect type from file extension
|
||||
- **detect_content_type_from_heuristics(text)**: Detect type from content patterns (returns type + confidence)
|
||||
- **detect_content_type(text, file_path)**: Combined detection (extension primary, heuristics fallback)
|
||||
|
|
|
|||
|
|
@ -22,8 +22,8 @@ from langchain_text_splitters import (
|
|||
from loguru import logger
|
||||
|
||||
# Constants
|
||||
CHUNK_SIZE = 1500 # characters
|
||||
CHUNK_OVERLAP = 225 # 15% of chunk size
|
||||
CHUNK_SIZE = 1200 # characters
|
||||
CHUNK_OVERLAP = 180 # 15% of chunk size
|
||||
HIGH_CONFIDENCE_THRESHOLD = 0.8 # Threshold for heuristics to override extension
|
||||
|
||||
|
||||
|
|
@ -73,7 +73,9 @@ _EXTENSION_TO_CONTENT_TYPE = {
|
|||
}
|
||||
|
||||
|
||||
def detect_content_type_from_extension(file_path: Optional[str]) -> Optional[ContentType]:
|
||||
def detect_content_type_from_extension(
|
||||
file_path: Optional[str],
|
||||
) -> Optional[ContentType]:
|
||||
"""
|
||||
Detect content type from file extension.
|
||||
|
||||
|
|
@ -220,9 +222,7 @@ def _calculate_markdown_score(text: str) -> float:
|
|||
return min(score, 1.0)
|
||||
|
||||
|
||||
def detect_content_type(
|
||||
text: str, file_path: Optional[str] = None
|
||||
) -> ContentType:
|
||||
def detect_content_type(text: str, file_path: Optional[str] = None) -> ContentType:
|
||||
"""
|
||||
Detect content type using file extension (primary) and heuristics (fallback).
|
||||
|
||||
|
|
@ -352,12 +352,18 @@ def chunk_text(
|
|||
splitter = _get_html_splitter()
|
||||
# HTML splitter returns Document objects
|
||||
docs = splitter.split_text(text)
|
||||
chunks = [doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs]
|
||||
chunks = [
|
||||
doc.page_content if hasattr(doc, "page_content") else str(doc)
|
||||
for doc in docs
|
||||
]
|
||||
elif content_type == ContentType.MARKDOWN:
|
||||
splitter = _get_markdown_splitter()
|
||||
# Markdown splitter returns Document objects
|
||||
docs = splitter.split_text(text)
|
||||
chunks = [doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in docs]
|
||||
chunks = [
|
||||
doc.page_content if hasattr(doc, "page_content") else str(doc)
|
||||
for doc in docs
|
||||
]
|
||||
else:
|
||||
# Plain text - use recursive splitter directly
|
||||
splitter = _get_plain_splitter()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue