diff --git a/open_notebook/domain/notebook.py b/open_notebook/domain/notebook.py index d23ee04..4321231 100644 --- a/open_notebook/domain/notebook.py +++ b/open_notebook/domain/notebook.py @@ -1,7 +1,6 @@ import os from typing import Any, ClassVar, Dict, List, Literal, Optional -from langchain_core.runnables.config import RunnableConfig from loguru import logger from pydantic import BaseModel, Field, field_validator @@ -15,8 +14,8 @@ from open_notebook.exceptions import ( DatabaseOperationError, InvalidInputError, ) -from open_notebook.graphs.multipattern import graph as pattern_graph -from open_notebook.graphs.recursive_toc import graph as toc_graph + +# from temp.recursive_toc import graph as toc_graph from open_notebook.utils import split_text, surreal_clean @@ -211,29 +210,6 @@ class Source(ObjectModel): logger.error(f"Error adding insight to source {self.id}: {str(e)}") raise DatabaseOperationError(e) - # todo: move this to content processing pipeline as a major graph - def generate_toc_and_title(self) -> "Source": - DEFAULT_MODELS, EMBEDDING_MODEL, SPEECH_TO_TEXT_MODEL = load_default_models() - - try: - config = RunnableConfig(configurable=dict(thread_id=self.id)) - result = toc_graph.invoke({"content": self.full_text}, config=config) - self.add_insight("Table of Contents", surreal_clean(result["toc"])) - if not self.title: - transformations = [ - "Based on the Table of Contents below, please provide a Title for this content, with max 15 words" - ] - output = pattern_graph.invoke( - dict(content_stack=[result["toc"]], transformations=transformations) - ) - self.title = surreal_clean(output["output"]) - self.save() - return self - except Exception as e: - logger.error(f"Error summarizing source {self.id}: {str(e)}") - logger.exception(e) - raise DatabaseOperationError(e) - class Note(ObjectModel): table_name: ClassVar[str] = "note" diff --git a/prompts/patterns/default/toc.jinja b/prompts/patterns/default/toc.jinja new file mode 100644 index 0000000..c78f159 --- /dev/null +++ b/prompts/patterns/default/toc.jinja @@ -0,0 +1,15 @@ + +# SYSTEM ROLE +You are a content analysis assistant that reads through documents and provides a Table of Contents (ToC) to help users identify what the document covers more easily. +Your ToC should capture all major topics and transitions in the content and should mention them in the order theh appear. + +# TASK +Analyze the provided content and create a Table of Contents: +- Captures the core topics included in the text +- Gives a small description of what is covered + +# INPUT + +{{content}} + +# OUTPUT \ No newline at end of file diff --git a/stream_app/source.py b/stream_app/source.py index 63c25f8..64fd54f 100644 --- a/stream_app/source.py +++ b/stream_app/source.py @@ -24,6 +24,28 @@ def run_patterns(input_text, patterns): return output["output"] +# moved it here to replace it with the pipeline on 0.1.0 +def generate_toc_and_title(source) -> "Source": + DEFAULT_MODELS, EMBEDDING_MODEL, SPEECH_TO_TEXT_MODEL = load_default_models() + + try: + patterns = ["patterns/default/toc"] + result = run_patterns(source.full_text, patterns=patterns) + source.add_insight("Table of Contents", surreal_clean(result)) + if not source.title: + transformations = [ + "Based on the Table of Contents below, please provide a Title for this content, with max 15 words" + ] + output = run_patterns(result["toc"], transformations=transformations) + source.title = surreal_clean(output["output"]) + source.save() + return source + except Exception as e: + logger.error(f"Error summarizing source {source.id}: {str(e)}") + logger.exception(e) + raise + + @st.dialog("Source", width="large") def source_panel(source_id): source: Source = Source.get(source_id) @@ -151,7 +173,7 @@ def add_source(session_id): source.save() source.add_to_notebook(st.session_state[session_id]["notebook"].id) st.write("Summarizing...") - source.generate_toc_and_title() + generate_toc_and_title(source) except UnsupportedTypeException as e: st.warning( "This type of content is not supported yet. If you think it should be, let us know on the project Issues's page" diff --git a/transformations.yaml b/transformations.yaml index b0d3a95..435fb3c 100644 --- a/transformations.yaml +++ b/transformations.yaml @@ -16,6 +16,11 @@ source_insights: description: "Create a dense representation of the content" patterns: - patterns/default/makeitdense + - name: "Table of Contents" + insight_type: "Table of Contents" + description: "Analyzes the content and returns a ToC" + patterns: + - patterns/default/analyze_paper - name: "Analyze Paper" insight_type: "Paper Analysis" description: "Analyze the paper and provide a quick summary"