mirror of
https://github.com/lfnovo/open-notebook.git
synced 2026-04-30 04:20:02 +00:00
Version 1 (#160)
New front-end Launch Chat API Manage Sources Enable re-embedding of all contents Sources can be added without a notebook now Improved settings Enable model selector on all chats Background processing for better experience Dark mode Improved Notes Improved Docs: - Remove all Streamlit references from documentation - Update deployment guides with React frontend setup - Fix Docker environment variables format (SURREAL_URL, SURREAL_PASSWORD) - Update docker image tag from :latest to :v1-latest - Change navigation references (Settings → Models to just Models) - Update development setup to include frontend npm commands - Add MIGRATION.md guide for users upgrading from Streamlit - Update quick-start guide with correct environment variables - Add port 5055 documentation for API access - Update project structure to reflect frontend/ directory - Remove outdated source-chat documentation files
This commit is contained in:
parent
124d7d110c
commit
b7e656a319
319 changed files with 46747 additions and 7408 deletions
|
|
@ -17,17 +17,14 @@ from open_notebook.utils import clean_thinking_content
|
|||
class SubGraphState(TypedDict):
|
||||
question: str
|
||||
term: str
|
||||
# type: Literal["text", "vector"]
|
||||
instructions: str
|
||||
results: dict
|
||||
answer: str
|
||||
ids: list # Added for provide_answer function
|
||||
|
||||
|
||||
class Search(BaseModel):
|
||||
term: str
|
||||
# type: Literal["text", "vector"] = Field(
|
||||
# description="The type of search. Use 'text' for keyword search and 'vector' for semantic search. If you are using text, search always for a single word"
|
||||
# )
|
||||
instructions: str = Field(
|
||||
description="Tell the answeting LLM what information you need extracted from this search"
|
||||
)
|
||||
|
|
@ -50,8 +47,8 @@ class ThreadState(TypedDict):
|
|||
|
||||
async def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict:
|
||||
parser = PydanticOutputParser(pydantic_object=Strategy)
|
||||
system_prompt = Prompter(prompt_template="ask/entry", parser=parser).render(
|
||||
data=state
|
||||
system_prompt = Prompter(prompt_template="ask/entry", parser=parser).render( # type: ignore[arg-type]
|
||||
data=state # type: ignore[arg-type]
|
||||
)
|
||||
model = await provision_langchain_model(
|
||||
system_prompt,
|
||||
|
|
@ -65,7 +62,8 @@ async def call_model_with_messages(state: ThreadState, config: RunnableConfig) -
|
|||
ai_message = await model.ainvoke(system_prompt)
|
||||
|
||||
# Clean the thinking content from the response
|
||||
cleaned_content = clean_thinking_content(ai_message.content)
|
||||
message_content = ai_message.content if isinstance(ai_message.content, str) else str(ai_message.content)
|
||||
cleaned_content = clean_thinking_content(message_content)
|
||||
|
||||
# Parse the cleaned JSON content
|
||||
strategy = parser.parse(cleaned_content)
|
||||
|
|
@ -99,7 +97,7 @@ async def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict:
|
|||
payload["results"] = results
|
||||
ids = [r["id"] for r in results]
|
||||
payload["ids"] = ids
|
||||
system_prompt = Prompter(prompt_template="ask/query_process").render(data=payload)
|
||||
system_prompt = Prompter(prompt_template="ask/query_process").render(data=payload) # type: ignore[arg-type]
|
||||
model = await provision_langchain_model(
|
||||
system_prompt,
|
||||
config.get("configurable", {}).get("answer_model"),
|
||||
|
|
@ -107,11 +105,12 @@ async def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict:
|
|||
max_tokens=2000,
|
||||
)
|
||||
ai_message = await model.ainvoke(system_prompt)
|
||||
return {"answers": [clean_thinking_content(ai_message.content)]}
|
||||
ai_content = ai_message.content if isinstance(ai_message.content, str) else str(ai_message.content)
|
||||
return {"answers": [clean_thinking_content(ai_content)]}
|
||||
|
||||
|
||||
async def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict:
|
||||
system_prompt = Prompter(prompt_template="ask/final_answer").render(data=state)
|
||||
system_prompt = Prompter(prompt_template="ask/final_answer").render(data=state) # type: ignore[arg-type]
|
||||
model = await provision_langchain_model(
|
||||
system_prompt,
|
||||
config.get("configurable", {}).get("final_answer_model"),
|
||||
|
|
@ -119,7 +118,8 @@ async def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict
|
|||
max_tokens=2000,
|
||||
)
|
||||
ai_message = await model.ainvoke(system_prompt)
|
||||
return {"final_answer": clean_thinking_content(ai_message.content)}
|
||||
final_content = ai_message.content if isinstance(ai_message.content, str) else str(ai_message.content)
|
||||
return {"final_answer": clean_thinking_content(final_content)}
|
||||
|
||||
|
||||
agent_state = StateGraph(ThreadState)
|
||||
|
|
|
|||
|
|
@ -20,19 +20,54 @@ class ThreadState(TypedDict):
|
|||
notebook: Optional[Notebook]
|
||||
context: Optional[str]
|
||||
context_config: Optional[dict]
|
||||
model_override: Optional[str]
|
||||
|
||||
|
||||
def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict:
|
||||
system_prompt = Prompter(prompt_template="chat").render(data=state)
|
||||
system_prompt = Prompter(prompt_template="chat").render(data=state) # type: ignore[arg-type]
|
||||
payload = [SystemMessage(content=system_prompt)] + state.get("messages", [])
|
||||
model = asyncio.run(
|
||||
provision_langchain_model(
|
||||
str(payload),
|
||||
config.get("configurable", {}).get("model_id"),
|
||||
"chat",
|
||||
max_tokens=10000,
|
||||
)
|
||||
model_id = (
|
||||
config.get("configurable", {}).get("model_id")
|
||||
or state.get("model_override")
|
||||
)
|
||||
|
||||
# Handle async model provisioning from sync context
|
||||
def run_in_new_loop():
|
||||
"""Run the async function in a new event loop"""
|
||||
new_loop = asyncio.new_event_loop()
|
||||
try:
|
||||
asyncio.set_event_loop(new_loop)
|
||||
return new_loop.run_until_complete(
|
||||
provision_langchain_model(
|
||||
str(payload),
|
||||
model_id,
|
||||
"chat",
|
||||
max_tokens=10000,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
new_loop.close()
|
||||
asyncio.set_event_loop(None)
|
||||
|
||||
try:
|
||||
# Try to get the current event loop
|
||||
asyncio.get_running_loop()
|
||||
# If we're in an event loop, run in a thread with a new loop
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(run_in_new_loop)
|
||||
model = future.result()
|
||||
except RuntimeError:
|
||||
# No event loop running, safe to use asyncio.run()
|
||||
model = asyncio.run(
|
||||
provision_langchain_model(
|
||||
str(payload),
|
||||
model_id,
|
||||
"chat",
|
||||
max_tokens=10000,
|
||||
)
|
||||
)
|
||||
|
||||
ai_message = model.invoke(payload)
|
||||
return {"messages": ai_message}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ from ai_prompter import Prompter
|
|||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
from loguru import logger
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from open_notebook.graphs.utils import provision_langchain_model
|
||||
|
|
@ -36,7 +35,7 @@ async def call_model(state: dict, config: RunnableConfig) -> dict:
|
|||
|
||||
|
||||
agent_state = StateGraph(PatternChainState)
|
||||
agent_state.add_node("agent", call_model)
|
||||
agent_state.add_node("agent", call_model) # type: ignore[type-var]
|
||||
agent_state.add_edge(START, "agent")
|
||||
agent_state.add_edge("agent", END)
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@ from open_notebook.graphs.transformation import graph as transform_graph
|
|||
class SourceState(TypedDict):
|
||||
content_state: ProcessSourceState
|
||||
apply_transformations: List[Transformation]
|
||||
notebook_id: str
|
||||
source_id: str
|
||||
notebook_ids: List[str]
|
||||
source: Source
|
||||
transformation: Annotated[list, operator.add]
|
||||
embed: bool
|
||||
|
|
@ -30,8 +31,14 @@ class TransformationState(TypedDict):
|
|||
|
||||
|
||||
async def content_process(state: SourceState) -> dict:
|
||||
content_settings = ContentSettings()
|
||||
content_state: Dict[str, Any] = state["content_state"]
|
||||
content_settings = ContentSettings(
|
||||
default_content_processing_engine_doc="auto",
|
||||
default_content_processing_engine_url="auto",
|
||||
default_embedding_option="ask",
|
||||
auto_delete_files="yes",
|
||||
youtube_preferred_languages=["en", "pt", "es", "de", "nl", "en-GB", "fr", "hi", "ja"]
|
||||
)
|
||||
content_state: Dict[str, Any] = state["content_state"] # type: ignore[assignment]
|
||||
|
||||
content_state["url_engine"] = (
|
||||
content_settings.default_content_processing_engine_url or "auto"
|
||||
|
|
@ -48,16 +55,23 @@ async def content_process(state: SourceState) -> dict:
|
|||
async def save_source(state: SourceState) -> dict:
|
||||
content_state = state["content_state"]
|
||||
|
||||
source = Source(
|
||||
asset=Asset(url=content_state.url, file_path=content_state.file_path),
|
||||
full_text=content_state.content,
|
||||
title=content_state.title,
|
||||
)
|
||||
# Get existing source using the provided source_id
|
||||
source = await Source.get(state["source_id"])
|
||||
if not source:
|
||||
raise ValueError(f"Source with ID {state['source_id']} not found")
|
||||
|
||||
# Update the source with processed content
|
||||
source.asset = Asset(url=content_state.url, file_path=content_state.file_path)
|
||||
source.full_text = content_state.content
|
||||
|
||||
# Preserve existing title if none provided in processed content
|
||||
if content_state.title:
|
||||
source.title = content_state.title
|
||||
|
||||
await source.save()
|
||||
|
||||
if state["notebook_id"]:
|
||||
logger.debug(f"Adding source to notebook {state['notebook_id']}")
|
||||
await source.add_to_notebook(state["notebook_id"])
|
||||
# NOTE: Notebook associations are created by the API immediately for UI responsiveness
|
||||
# No need to create them here to avoid duplicate edges
|
||||
|
||||
if state["embed"]:
|
||||
logger.debug("Embedding content for vector search")
|
||||
|
|
@ -94,7 +108,7 @@ async def transform_content(state: TransformationState) -> Optional[dict]:
|
|||
|
||||
logger.debug(f"Applying transformation {transformation.name}")
|
||||
result = await transform_graph.ainvoke(
|
||||
dict(input_text=content, transformation=transformation)
|
||||
dict(input_text=content, transformation=transformation) # type: ignore[arg-type]
|
||||
)
|
||||
await source.add_insight(transformation.title, result["output"])
|
||||
return {
|
||||
|
|
|
|||
214
open_notebook/graphs/source_chat.py
Normal file
214
open_notebook/graphs/source_chat.py
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
import asyncio
|
||||
import sqlite3
|
||||
from typing import Annotated, Dict, List, Optional
|
||||
|
||||
from ai_prompter import Prompter
|
||||
from langchain_core.messages import SystemMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.checkpoint.sqlite import SqliteSaver
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
from langgraph.graph.message import add_messages
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from open_notebook.config import LANGGRAPH_CHECKPOINT_FILE
|
||||
from open_notebook.domain.notebook import Source, SourceInsight
|
||||
from open_notebook.graphs.utils import provision_langchain_model
|
||||
from open_notebook.utils.context_builder import ContextBuilder
|
||||
|
||||
|
||||
class SourceChatState(TypedDict):
|
||||
messages: Annotated[list, add_messages]
|
||||
source_id: str
|
||||
source: Optional[Source]
|
||||
insights: Optional[List[SourceInsight]]
|
||||
context: Optional[str]
|
||||
model_override: Optional[str]
|
||||
context_indicators: Optional[Dict[str, List[str]]]
|
||||
|
||||
|
||||
def call_model_with_source_context(state: SourceChatState, config: RunnableConfig) -> dict:
|
||||
"""
|
||||
Main function that builds source context and calls the model.
|
||||
|
||||
This function:
|
||||
1. Uses ContextBuilder to build source-specific context
|
||||
2. Applies the source_chat Jinja2 prompt template
|
||||
3. Handles model provisioning with override support
|
||||
4. Tracks context indicators for referenced insights/content
|
||||
"""
|
||||
source_id = state.get("source_id")
|
||||
if not source_id:
|
||||
raise ValueError("source_id is required in state")
|
||||
|
||||
# Build source context using ContextBuilder (run async code in new loop)
|
||||
def build_context():
|
||||
"""Build context in a new event loop"""
|
||||
new_loop = asyncio.new_event_loop()
|
||||
try:
|
||||
asyncio.set_event_loop(new_loop)
|
||||
context_builder = ContextBuilder(
|
||||
source_id=source_id,
|
||||
include_insights=True,
|
||||
include_notes=False, # Focus on source-specific content
|
||||
max_tokens=50000 # Reasonable limit for source context
|
||||
)
|
||||
return new_loop.run_until_complete(context_builder.build())
|
||||
finally:
|
||||
new_loop.close()
|
||||
asyncio.set_event_loop(None)
|
||||
|
||||
# Get the built context
|
||||
try:
|
||||
# Try to get the current event loop
|
||||
asyncio.get_running_loop()
|
||||
# If we're in an event loop, run in a thread with a new loop
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(build_context)
|
||||
context_data = future.result()
|
||||
except RuntimeError:
|
||||
# No event loop running, safe to create a new one
|
||||
context_data = build_context()
|
||||
|
||||
# Extract source and insights from context
|
||||
source = None
|
||||
insights = []
|
||||
context_indicators: dict[str, list[str | None]] = {"sources": [], "insights": [], "notes": []}
|
||||
|
||||
if context_data.get("sources"):
|
||||
source_info = context_data["sources"][0] # First source
|
||||
source = Source(**source_info) if isinstance(source_info, dict) else source_info
|
||||
context_indicators["sources"].append(source.id)
|
||||
|
||||
if context_data.get("insights"):
|
||||
for insight_data in context_data["insights"]:
|
||||
insight = SourceInsight(**insight_data) if isinstance(insight_data, dict) else insight_data
|
||||
insights.append(insight)
|
||||
context_indicators["insights"].append(insight.id)
|
||||
|
||||
# Format context for the prompt
|
||||
formatted_context = _format_source_context(context_data)
|
||||
|
||||
# Build prompt data for the template
|
||||
prompt_data = {
|
||||
"source": source.model_dump() if source else None,
|
||||
"insights": [insight.model_dump() for insight in insights] if insights else [],
|
||||
"context": formatted_context,
|
||||
"context_indicators": context_indicators
|
||||
}
|
||||
|
||||
# Apply the source_chat prompt template
|
||||
system_prompt = Prompter(prompt_template="source_chat").render(data=prompt_data)
|
||||
payload = [SystemMessage(content=system_prompt)] + state.get("messages", [])
|
||||
|
||||
# Handle async model provisioning from sync context
|
||||
def run_in_new_loop():
|
||||
"""Run the async function in a new event loop"""
|
||||
new_loop = asyncio.new_event_loop()
|
||||
try:
|
||||
asyncio.set_event_loop(new_loop)
|
||||
return new_loop.run_until_complete(
|
||||
provision_langchain_model(
|
||||
str(payload),
|
||||
config.get("configurable", {}).get("model_id") or state.get("model_override"),
|
||||
"chat",
|
||||
max_tokens=10000,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
new_loop.close()
|
||||
asyncio.set_event_loop(None)
|
||||
|
||||
try:
|
||||
# Try to get the current event loop
|
||||
asyncio.get_running_loop()
|
||||
# If we're in an event loop, run in a thread with a new loop
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = executor.submit(run_in_new_loop)
|
||||
model = future.result()
|
||||
except RuntimeError:
|
||||
# No event loop running, safe to use asyncio.run()
|
||||
model = asyncio.run(
|
||||
provision_langchain_model(
|
||||
str(payload),
|
||||
config.get("configurable", {}).get("model_id") or state.get("model_override"),
|
||||
"chat",
|
||||
max_tokens=10000,
|
||||
)
|
||||
)
|
||||
|
||||
ai_message = model.invoke(payload)
|
||||
|
||||
# Update state with context information
|
||||
return {
|
||||
"messages": ai_message,
|
||||
"source": source,
|
||||
"insights": insights,
|
||||
"context": formatted_context,
|
||||
"context_indicators": context_indicators
|
||||
}
|
||||
|
||||
|
||||
def _format_source_context(context_data: Dict) -> str:
|
||||
"""
|
||||
Format the context data into a readable string for the prompt.
|
||||
|
||||
Args:
|
||||
context_data: Context data from ContextBuilder
|
||||
|
||||
Returns:
|
||||
Formatted context string
|
||||
"""
|
||||
context_parts = []
|
||||
|
||||
# Add source information
|
||||
if context_data.get("sources"):
|
||||
context_parts.append("## SOURCE CONTENT")
|
||||
for source in context_data["sources"]:
|
||||
if isinstance(source, dict):
|
||||
context_parts.append(f"**Source ID:** {source.get('id', 'Unknown')}")
|
||||
context_parts.append(f"**Title:** {source.get('title', 'No title')}")
|
||||
if source.get("full_text"):
|
||||
# Truncate full text if too long
|
||||
full_text = source["full_text"]
|
||||
if len(full_text) > 5000:
|
||||
full_text = full_text[:5000] + "...\n[Content truncated]"
|
||||
context_parts.append(f"**Content:**\n{full_text}")
|
||||
context_parts.append("") # Empty line for separation
|
||||
|
||||
# Add insights
|
||||
if context_data.get("insights"):
|
||||
context_parts.append("## SOURCE INSIGHTS")
|
||||
for insight in context_data["insights"]:
|
||||
if isinstance(insight, dict):
|
||||
context_parts.append(f"**Insight ID:** {insight.get('id', 'Unknown')}")
|
||||
context_parts.append(f"**Type:** {insight.get('insight_type', 'Unknown')}")
|
||||
context_parts.append(f"**Content:** {insight.get('content', 'No content')}")
|
||||
context_parts.append("") # Empty line for separation
|
||||
|
||||
# Add metadata
|
||||
if context_data.get("metadata"):
|
||||
metadata = context_data["metadata"]
|
||||
context_parts.append("## CONTEXT METADATA")
|
||||
context_parts.append(f"- Source count: {metadata.get('source_count', 0)}")
|
||||
context_parts.append(f"- Insight count: {metadata.get('insight_count', 0)}")
|
||||
context_parts.append(f"- Total tokens: {context_data.get('total_tokens', 0)}")
|
||||
context_parts.append("")
|
||||
|
||||
return "\n".join(context_parts)
|
||||
|
||||
|
||||
# Create SQLite checkpointer
|
||||
conn = sqlite3.connect(
|
||||
LANGGRAPH_CHECKPOINT_FILE,
|
||||
check_same_thread=False,
|
||||
)
|
||||
memory = SqliteSaver(conn)
|
||||
|
||||
# Create the StateGraph
|
||||
source_chat_state = StateGraph(SourceChatState)
|
||||
source_chat_state.add_node("source_chat_agent", call_model_with_source_context)
|
||||
source_chat_state.add_edge(START, "source_chat_agent")
|
||||
source_chat_state.add_edge("source_chat_agent", END)
|
||||
source_chat_graph = source_chat_state.compile(checkpointer=memory)
|
||||
|
|
@ -18,14 +18,15 @@ class TransformationState(TypedDict):
|
|||
|
||||
|
||||
async def run_transformation(state: dict, config: RunnableConfig) -> dict:
|
||||
source: Source = state.get("source")
|
||||
source_obj = state.get("source")
|
||||
source: Source = source_obj if isinstance(source_obj, Source) else None # type: ignore[assignment]
|
||||
content = state.get("input_text")
|
||||
assert source or content, "No content to transform"
|
||||
transformation: Transformation = state["transformation"]
|
||||
if not content:
|
||||
content = source.full_text
|
||||
transformation_template_text = transformation.prompt
|
||||
default_prompts: DefaultPrompts = DefaultPrompts()
|
||||
default_prompts: DefaultPrompts = DefaultPrompts(transformation_instructions=None)
|
||||
if default_prompts.transformation_instructions:
|
||||
transformation_template_text = f"{default_prompts.transformation_instructions}\n\n{transformation_template_text}"
|
||||
|
||||
|
|
@ -34,7 +35,8 @@ async def run_transformation(state: dict, config: RunnableConfig) -> dict:
|
|||
system_prompt = Prompter(template_text=transformation_template_text).render(
|
||||
data=state
|
||||
)
|
||||
payload = [SystemMessage(content=system_prompt)] + [HumanMessage(content=content)]
|
||||
content_str = str(content) if content else ""
|
||||
payload = [SystemMessage(content=system_prompt), HumanMessage(content=content_str)]
|
||||
chain = await provision_langchain_model(
|
||||
str(payload),
|
||||
config.get("configurable", {}).get("model_id"),
|
||||
|
|
@ -45,7 +47,8 @@ async def run_transformation(state: dict, config: RunnableConfig) -> dict:
|
|||
response = await chain.ainvoke(payload)
|
||||
|
||||
# Clean thinking content from the response
|
||||
cleaned_content = clean_thinking_content(response.content)
|
||||
response_content = response.content if isinstance(response.content, str) else str(response.content)
|
||||
cleaned_content = clean_thinking_content(response_content)
|
||||
|
||||
if source:
|
||||
await source.add_insight(transformation.title, cleaned_content)
|
||||
|
|
@ -56,7 +59,7 @@ async def run_transformation(state: dict, config: RunnableConfig) -> dict:
|
|||
|
||||
|
||||
agent_state = StateGraph(TransformationState)
|
||||
agent_state.add_node("agent", run_transformation)
|
||||
agent_state.add_node("agent", run_transformation) # type: ignore[type-var]
|
||||
agent_state.add_edge(START, "agent")
|
||||
agent_state.add_edge("agent", END)
|
||||
graph = agent_state.compile()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue