Version 1 (#160)

New front-end
Launch Chat API
Manage Sources
Enable re-embedding of all contents
Sources can be added without a notebook now
Improved settings
Enable model selector on all chats
Background processing for better experience
Dark mode
Improved Notes

Improved Docs: 
- Remove all Streamlit references from documentation
- Update deployment guides with React frontend setup
- Fix Docker environment variables format (SURREAL_URL, SURREAL_PASSWORD)
- Update docker image tag from :latest to :v1-latest
- Change navigation references (Settings → Models to just Models)
- Update development setup to include frontend npm commands
- Add MIGRATION.md guide for users upgrading from Streamlit
- Update quick-start guide with correct environment variables
- Add port 5055 documentation for API access
- Update project structure to reflect frontend/ directory
- Remove outdated source-chat documentation files
This commit is contained in:
Luis Novo 2025-10-18 12:46:22 -03:00 committed by GitHub
parent 124d7d110c
commit b7e656a319
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
319 changed files with 46747 additions and 7408 deletions

View file

@ -1,9 +1,9 @@
import asyncio
from concurrent.futures import ThreadPoolExecutor
from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple
from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple, Union
from loguru import logger
from pydantic import BaseModel, Field, field_validator
from surrealdb import RecordID
from open_notebook.database.repository import ensure_record_id, repo_query
from open_notebook.domain.base import ObjectModel
@ -128,7 +128,7 @@ class SourceInsight(ObjectModel):
logger.exception(e)
raise DatabaseOperationError(e)
async def save_as_note(self, notebook_id: str = None) -> Any:
async def save_as_note(self, notebook_id: Optional[str] = None) -> Any:
source = await self.get_source()
note = Note(
title=f"{self.insight_type} from source {source.title}",
@ -146,6 +146,71 @@ class Source(ObjectModel):
title: Optional[str] = None
topics: Optional[List[str]] = Field(default_factory=list)
full_text: Optional[str] = None
command: Optional[Union[str, RecordID]] = Field(
default=None, description="Link to surreal-commands processing job"
)
class Config:
arbitrary_types_allowed = True
@field_validator("command", mode="before")
@classmethod
def parse_command(cls, value):
"""Parse command field to ensure RecordID format"""
if isinstance(value, str) and value:
return ensure_record_id(value)
return value
@field_validator("id", mode="before")
@classmethod
def parse_id(cls, value):
"""Parse id field to handle both string and RecordID inputs"""
if value is None:
return None
if isinstance(value, RecordID):
return str(value)
return str(value) if value else None
async def get_status(self) -> Optional[str]:
"""Get the processing status of the associated command"""
if not self.command:
return None
try:
from surreal_commands import get_command_status
status = await get_command_status(str(self.command))
return status.status if status else "unknown"
except Exception as e:
logger.warning(f"Failed to get command status for {self.command}: {e}")
return "unknown"
async def get_processing_progress(self) -> Optional[Dict[str, Any]]:
"""Get detailed processing information for the associated command"""
if not self.command:
return None
try:
from surreal_commands import get_command_status
status_result = await get_command_status(str(self.command))
if not status_result:
return None
# Extract execution metadata if available
result = getattr(status_result, "result", None)
execution_metadata = result.get("execution_metadata", {}) if isinstance(result, dict) else {}
return {
"status": status_result.status,
"started_at": execution_metadata.get("started_at"),
"completed_at": execution_metadata.get("completed_at"),
"error": getattr(status_result, "error_message", None),
"result": result,
}
except Exception as e:
logger.warning(f"Failed to get command progress for {self.command}: {e}")
return None
async def get_context(
self, context_size: Literal["short", "long"] = "short"
@ -202,6 +267,17 @@ class Source(ObjectModel):
EMBEDDING_MODEL = await model_manager.get_embedding_model()
try:
# DELETE EXISTING EMBEDDINGS FIRST - Makes vectorize() idempotent
delete_result = await repo_query(
"DELETE source_embedding WHERE source = $source_id",
{"source_id": ensure_record_id(self.id)}
)
deleted_count = len(delete_result) if delete_result else 0
if deleted_count > 0:
logger.info(f"Deleted {deleted_count} existing embeddings for source {self.id}")
else:
logger.debug(f"No existing embeddings found for source {self.id}")
if not self.full_text:
logger.warning(f"No text to vectorize for source {self.id}")
return
@ -224,6 +300,8 @@ class Source(ObjectModel):
) -> Tuple[int, List[float], str]:
logger.debug(f"Processing chunk {idx}/{chunk_count}")
try:
if EMBEDDING_MODEL is None:
raise ValueError("EMBEDDING_MODEL is not configured")
embedding = (await EMBEDDING_MODEL.aembed([chunk]))[0]
cleaned_content = chunk
logger.debug(f"Successfully processed chunk {idx}")
@ -294,6 +372,16 @@ class Source(ObjectModel):
logger.error(f"Error adding insight to source {self.id}: {str(e)}")
raise # DatabaseOperationError(e)
def _prepare_save_data(self) -> dict:
"""Override to ensure command field is always RecordID format for database"""
data = super()._prepare_save_data()
# Ensure command field is RecordID format if not None
if data.get("command") is not None:
data["command"] = ensure_record_id(data["command"])
return data
class Note(ObjectModel):
table_name: ClassVar[str] = "note"
@ -335,12 +423,18 @@ class Note(ObjectModel):
class ChatSession(ObjectModel):
table_name: ClassVar[str] = "chat_session"
title: Optional[str] = None
model_override: Optional[str] = None
async def relate_to_notebook(self, notebook_id: str) -> Any:
if not notebook_id:
raise InvalidInputError("Notebook ID must be provided")
return await self.relate("refers_to", notebook_id)
async def relate_to_source(self, source_id: str) -> Any:
if not source_id:
raise InvalidInputError("Source ID must be provided")
return await self.relate("refers_to", source_id)
async def text_search(
keyword: str, results: int, source: bool = True, note: bool = True
@ -348,14 +442,14 @@ async def text_search(
if not keyword:
raise InvalidInputError("Search keyword cannot be empty")
try:
results = await repo_query(
search_results = await repo_query(
"""
select *
from fn::text_search($keyword, $results, $source, $note)
""",
{"keyword": keyword, "results": results, "source": source, "note": note},
)
return results
return search_results
except Exception as e:
logger.error(f"Error performing text search: {str(e)}")
logger.exception(e)
@ -373,8 +467,10 @@ async def vector_search(
raise InvalidInputError("Search keyword cannot be empty")
try:
EMBEDDING_MODEL = await model_manager.get_embedding_model()
if EMBEDDING_MODEL is None:
raise ValueError("EMBEDDING_MODEL is not configured")
embed = (await EMBEDDING_MODEL.aembed([keyword]))[0]
results = await repo_query(
search_results = await repo_query(
"""
SELECT * FROM fn::vector_search($embed, $results, $source, $note, $minimum_score);
""",
@ -386,7 +482,7 @@ async def vector_search(
"minimum_score": minimum_score,
},
)
return results
return search_results
except Exception as e:
logger.error(f"Error performing vector search: {str(e)}")
logger.exception(e)