diff --git a/app_home.py b/app_home.py index 8c81175..e18ffaa 100644 --- a/app_home.py +++ b/app_home.py @@ -1,29 +1,15 @@ import streamlit as st -from open_notebook.exceptions import InvalidDatabaseSchema, NoSchemaFound -from open_notebook.repository import check_database_version, execute_migration +from open_notebook.database.migrate import MigrationManager from stream_app.utils import version_sidebar -try: - version_sidebar() - check_database_version() +version_sidebar() +mm = MigrationManager() +if mm.needs_migration: + st.warning("The Open Notebook database needs a migration to run properly.") + if st.button("Run Migration"): + mm.run_migration_up() + st.success("Migration successful") + st.rerun() +else: st.switch_page("pages/2_📒_Notebooks.py") -except NoSchemaFound as e: - st.warning(e) - if st.button("Create Schema.."): - try: - execute_migration("db_setup.surrealql") - st.success("Schema created successfully") - st.rerun() - except Exception as e: - st.error(e) -except InvalidDatabaseSchema as e: - st.warning(e) - if st.button("Execute Migration.."): - try: - execute_migration("0_0_1_to_0_0_2.surrealql") - st.success("Migration executed successfully") - st.rerun() - except Exception as e: - st.error(e) -st.stop() diff --git a/database/0_0_1_to_0_0_2.surrealql b/database/0_0_1_to_0_0_2.surrealql deleted file mode 100644 index aa2d8c0..0000000 --- a/database/0_0_1_to_0_0_2.surrealql +++ /dev/null @@ -1,82 +0,0 @@ - -DEFINE FIELD full_text ON TABLE source TYPE option; -REMOVE TABLE IF EXISTS source_chunk; -REMOVE INDEX IF EXISTS idx_source_full ON TABLE source_chunk; -DEFINE FIELD IF NOT EXISTS archived ON TABLE notebook TYPE option DEFAULT False; -DEFINE INDEX idx_source_full ON TABLE source_chunk COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; - -REMOVE FUNCTION IF EXISTS fn::text_search; - -DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) { - - let $source_title_search = - IF $sources {( - SELECT id as item_id, math::max(search::score(1)) AS relevance - FROM source - WHERE title @1@ $query_text - GROUP BY item_id)} - ELSE { [] }; - - let $source_embedding_search = - IF $sources {( - SELECT source as item_id, math::max(search::score(1)) AS relevance - FROM source_embedding - WHERE content @1@ $query_text - GROUP BY item_id)} - ELSE { [] }; - - let $source_full_search = - IF $sources {( - SELECT source as item_id, math::max(search::score(1)) AS relevance - FROM source - WHERE full_text @1@ $query_text - GROUP BY item_id)} - ELSE { [] }; - - let $source_insight_search = - IF $sources {( - SELECT source as item_id, math::max(search::score(1)) AS relevance - FROM source_insight - WHERE content @1@ $query_text - GROUP BY item_id)} - ELSE { [] }; - - let $note_title_search = - IF $show_notes {( - SELECT id as item_id, math::max(search::score(1)) AS relevance - FROM note - WHERE title @1@ $query_text - GROUP BY item_id)} - ELSE { [] }; - - let $note_content_search = - IF $show_notes {( - SELECT id as item_id, math::max(search::score(1)) AS relevance - FROM note - WHERE content @1@ $query_text - GROUP BY item_id)} - ELSE { [] }; - - let $source_chunk_results = array::union($source_embedding_search, $source_full_search); - - let $source_asset_results = array::union($source_title_search, $source_insight_search); - - let $source_results = array::union($source_chunk_results, $source_asset_results ); - let $note_results = array::union($note_title_search, $note_content_search ); - let $final_results = array::union($source_results, $note_results ); - - RETURN (SELECT item_id, math::max(relevance) as relevance from $final_results - group by item_id ORDER BY relevance DESC LIMIT $match_count); - - -}; - -DEFINE EVENT IF NOT EXISTS source_delete ON TABLE source WHEN ($after == NONE) THEN { - delete source_embedding where source == $before.id; - delete source_insight where source == $before.id; -}; - -DEFINE TABLE IF NOT EXISTS podcast_config SCHEMALESS; - -UPDATE open_notebook:database_info SET - version= "0.0.2"; diff --git a/database/db_setup.surrealql b/migrations/1.surrealql similarity index 55% rename from database/db_setup.surrealql rename to migrations/1.surrealql index 32e9492..8ca52ca 100644 --- a/database/db_setup.surrealql +++ b/migrations/1.surrealql @@ -1,92 +1,78 @@ -REMOVE table IF EXISTS source; -REMOVE table IF EXISTS reference; -REMOVE table IF EXISTS notebook; -REMOVE table IF EXISTS note; -REMOVE table IF EXISTS artifact; -REMOVE table IF EXISTS source_chunk; -REMOVE table IF EXISTS source_insight; -REMOVE ANALYZER IF EXISTS my_analyzer; -REMOVE FUNCTION IF EXISTS fn::text_search; - -REMOVE INDEX IF EXISTS idx_source_full ON TABLE source_chunk; -REMOVE INDEX IF EXISTS idx_source_embed_chunk ON TABLE source_embedding; -REMOVE INDEX IF EXISTS idx_source_insight ON TABLE source_insight; -REMOVE INDEX IF EXISTS idx_note ON TABLE note; -REMOVE INDEX IF EXISTS idx_source_title ON TABLE source; -REMOVE INDEX IF EXISTS idx_note_title ON TABLE note; DEFINE TABLE IF NOT EXISTS source SCHEMAFULL; -DEFINE FIELD asset +DEFINE FIELD IF NOT EXISTS + asset ON TABLE source FLEXIBLE TYPE option; -DEFINE FIELD title ON TABLE source TYPE option; -DEFINE FIELD full_text ON TABLE source TYPE option; -DEFINE FIELD topics ON TABLE source TYPE option>; +DEFINE FIELD IF NOT EXISTS title ON TABLE source TYPE option; +DEFINE FIELD IF NOT EXISTS topics ON TABLE source TYPE option>; +DEFINE FIELD IF NOT EXISTS full_text ON TABLE source TYPE option; -DEFINE FIELD created ON source DEFAULT time::now() VALUE $before OR time::now(); -DEFINE FIELD updated ON source DEFAULT time::now() VALUE time::now(); +DEFINE FIELD IF NOT EXISTS created ON source DEFAULT time::now() VALUE $before OR time::now(); +DEFINE FIELD IF NOT EXISTS updated ON source DEFAULT time::now() VALUE time::now(); DEFINE TABLE IF NOT EXISTS source_embedding SCHEMAFULL; -DEFINE FIELD source ON TABLE source_embedding TYPE record; -DEFINE FIELD order ON TABLE source_embedding TYPE int; -DEFINE FIELD content ON TABLE source_embedding TYPE string; -DEFINE FIELD embedding ON TABLE source_embedding TYPE array; +DEFINE FIELD IF NOT EXISTS source ON TABLE source_embedding TYPE record; +DEFINE FIELD IF NOT EXISTS order ON TABLE source_embedding TYPE int; +DEFINE FIELD IF NOT EXISTS content ON TABLE source_embedding TYPE string; +DEFINE FIELD IF NOT EXISTS embedding ON TABLE source_embedding TYPE array; DEFINE TABLE IF NOT EXISTS source_insight SCHEMAFULL; -DEFINE FIELD source ON TABLE source_insight TYPE record; -DEFINE FIELD insight_type ON TABLE source_insight TYPE string; -DEFINE FIELD content ON TABLE source_insight TYPE string; -DEFINE FIELD embedding ON TABLE source_insight TYPE array; +DEFINE FIELD IF NOT EXISTS source ON TABLE source_insight TYPE record; +DEFINE FIELD IF NOT EXISTS insight_type ON TABLE source_insight TYPE string; +DEFINE FIELD IF NOT EXISTS content ON TABLE source_insight TYPE string; +DEFINE FIELD IF NOT EXISTS embedding ON TABLE source_insight TYPE array; -DEFINE EVENT source_delete ON TABLE source WHEN ($after == NONE) THEN { +DEFINE EVENT IF NOT EXISTS source_delete ON TABLE source WHEN ($after == NONE) THEN { delete source_embedding where source == $before.id; delete source_insight where source == $before.id; }; DEFINE TABLE IF NOT EXISTS note SCHEMAFULL; -DEFINE FIELD title ON TABLE note TYPE option; -DEFINE FIELD summary ON TABLE note TYPE option; -DEFINE FIELD content ON TABLE note TYPE option; -DEFINE FIELD embedding ON TABLE note TYPE array; +DEFINE FIELD IF NOT EXISTS title ON TABLE note TYPE option; +DEFINE FIELD IF NOT EXISTS summary ON TABLE note TYPE option; +DEFINE FIELD IF NOT EXISTS content ON TABLE note TYPE option; +DEFINE FIELD IF NOT EXISTS embedding ON TABLE note TYPE array; -DEFINE FIELD created ON note DEFAULT time::now() VALUE $before OR time::now(); -DEFINE FIELD updated ON note DEFAULT time::now() VALUE time::now(); +DEFINE FIELD IF NOT EXISTS created ON note DEFAULT time::now() VALUE $before OR time::now(); +DEFINE FIELD IF NOT EXISTS updated ON note DEFAULT time::now() VALUE time::now(); DEFINE TABLE IF NOT EXISTS notebook SCHEMAFULL; -DEFINE FIELD name ON TABLE notebook TYPE option; -DEFINE FIELD description ON TABLE notebook TYPE option; -DEFINE FIELD archived ON TABLE notebook TYPE option DEFAULT False; +DEFINE FIELD IF NOT EXISTS name ON TABLE notebook TYPE option; +DEFINE FIELD IF NOT EXISTS description ON TABLE notebook TYPE option; +DEFINE FIELD IF NOT EXISTS archived ON TABLE notebook TYPE option DEFAULT False; -DEFINE FIELD created ON notebook DEFAULT time::now() VALUE $before OR time::now(); -DEFINE FIELD updated ON notebook DEFAULT time::now() VALUE time::now(); +DEFINE FIELD IF NOT EXISTS created ON notebook DEFAULT time::now() VALUE $before OR time::now(); +DEFINE FIELD IF NOT EXISTS updated ON notebook DEFAULT time::now() VALUE time::now(); -DEFINE TABLE reference +DEFINE TABLE IF NOT EXISTS reference TYPE RELATION FROM source TO notebook; -DEFINE TABLE artifact +DEFINE TABLE IF NOT EXISTS artifact TYPE RELATION FROM note TO notebook; --- entender o analyzer -DEFINE ANALYZER my_analyzer TOKENIZERS blank,class,camel,punct FILTERS snowball(english), lowercase; +DEFINE TABLE IF NOT EXISTS podcast_config SCHEMALESS; -DEFINE INDEX idx_source_title ON TABLE source COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; -DEFINE INDEX idx_source_full_text ON TABLE source COLUMNS full_text SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; -DEFINE INDEX idx_source_embed_chunk ON TABLE source_embedding COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; -DEFINE INDEX idx_source_insight ON TABLE source_insight COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; -DEFINE INDEX idx_note ON TABLE note COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; -DEFINE INDEX idx_note_title ON TABLE note COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; +-- entender o analyzer +DEFINE ANALYZER IF NOT EXISTS my_analyzer TOKENIZERS blank,class,camel,punct FILTERS snowball(english), lowercase; + +DEFINE INDEX IF NOT EXISTS idx_source_title ON TABLE source COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; +DEFINE INDEX IF NOT EXISTS idx_source_full_text ON TABLE source COLUMNS full_text SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; +DEFINE INDEX IF NOT EXISTS idx_source_embed_chunk ON TABLE source_embedding COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; +DEFINE INDEX IF NOT EXISTS idx_source_insight ON TABLE source_insight COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; +DEFINE INDEX IF NOT EXISTS idx_note ON TABLE note COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; +DEFINE INDEX IF NOT EXISTS idx_note_title ON TABLE note COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS; DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) { - - + let $source_title_search = IF $sources {( SELECT id as item_id, math::max(search::score(1)) AS relevance @@ -150,8 +136,6 @@ DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: }; -REMOVE FUNCTION fn::vector_search; - DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array, $match_count: int, $sources:bool, $show_notes:bool) { let $source_embedding_search = @@ -188,10 +172,7 @@ DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array, $match_cou }; -CREATE open_notebook:database_info SET - version= "0.0.2"; - -UPDATE open_notebook:database_info SET - version= "0.0.2"; - - +IF array::len(select * from open_notebook:default_models) == 0 THEN + CREATE open_notebook:default_models SET + default_chat_model= "" +END; diff --git a/migrations/1_down.surrealql b/migrations/1_down.surrealql new file mode 100644 index 0000000..e53791a --- /dev/null +++ b/migrations/1_down.surrealql @@ -0,0 +1,24 @@ +REMOVE TABLE IF EXISTS source; +REMOVE TABLE IF EXISTS source_embedding; +REMOVE TABLE IF EXISTS source_insight; +REMOVE TABLE IF EXISTS note; +REMOVE TABLE IF EXISTS notebook; +REMOVE TABLE IF EXISTS reference; +REMOVE TABLE IF EXISTS artifact; +REMOVE TABLE IF EXISTS podcast_config; + +REMOVE EVENT IF EXISTS source_delete ON TABLE source; + +REMOVE ANALYZER IF EXISTS my_analyzer; + +REMOVE INDEX IF EXISTS idx_source_title ON TABLE source; +REMOVE INDEX IF EXISTS idx_source_full_text ON TABLE source; +REMOVE INDEX IF EXISTS idx_source_embed_chunk ON TABLE source_embedding; +REMOVE INDEX IF EXISTS idx_source_insight ON TABLE source_insight; +REMOVE INDEX IF EXISTS idx_note ON TABLE note; +REMOVE INDEX IF EXISTS idx_note_title ON TABLE note; + +REMOVE FUNCTION IF EXISTS fn::text_search; +REMOVE FUNCTION IF EXISTS fn::vector_search; + +DELETE open_notebook:default_models; diff --git a/open_notebook/database/migrate.py b/open_notebook/database/migrate.py new file mode 100644 index 0000000..7d99fbd --- /dev/null +++ b/open_notebook/database/migrate.py @@ -0,0 +1,56 @@ +import os + +from loguru import logger +from sblpy.connection import SurrealSyncConnection +from sblpy.migrations.db_processes import get_latest_version +from sblpy.migrations.migrations import Migration +from sblpy.migrations.runner import MigrationRunner + + +class MigrationManager: + def __init__(self): + self.connection = SurrealSyncConnection( + host=os.environ["SURREAL_ADDRESS"], + port=int(os.environ["SURREAL_PORT"]), + user=os.environ["SURREAL_USER"], + password=os.environ["SURREAL_PASS"], + namespace=os.environ["SURREAL_NAMESPACE"], + database=os.environ["SURREAL_DATABASE"], + encrypted=False, # Set to True if using SSL + ) + self.up_migrations = [Migration.from_file("migrations/1.surrealql")] + self.down_migrations = [Migration.from_file("migrations/1_down.surrealql")] + self.runner = MigrationRunner( + up_migrations=self.up_migrations, + down_migrations=self.down_migrations, + connection=self.connection, + ) + + def get_current_version(self) -> int: + return get_latest_version( + self.connection.host, + self.connection.port, + self.connection.user, + self.connection.password, + self.connection.namespace, + self.connection.database, + ) + + @property + def needs_migration(self) -> bool: + current_version = self.get_current_version() + return current_version < len(self.up_migrations) + + def run_migration_up(self): + current_version = self.get_current_version() + logger.debug(f"Current version before migration: {current_version}") + + if self.needs_migration: + try: + self.runner.run() + new_version = self.get_current_version() + logger.debug(f"Migration successful. New version: {new_version}") + except Exception as e: + logger.error(f"Migration failed: {str(e)}") + else: + logger.debug("Database is already at the latest version") diff --git a/open_notebook/repository.py b/open_notebook/database/repository.py similarity index 70% rename from open_notebook/repository.py rename to open_notebook/database/repository.py index ca6f47a..59f0cca 100644 --- a/open_notebook/repository.py +++ b/open_notebook/database/repository.py @@ -5,10 +5,6 @@ from typing import Any, Dict, Optional from loguru import logger from sblpy.connection import SurrealSyncConnection -from open_notebook.exceptions import InvalidDatabaseSchema, NoSchemaFound - -EXPECTED_VERSION = "0.0.2" - @contextmanager def db_connection(): @@ -39,25 +35,6 @@ def repo_query(query_str: str, vars: Optional[Dict[str, Any]] = None): raise -def check_database_version(): - try: - result = repo_query("SELECT * FROM open_notebook:database_info;") - - if not result: - raise NoSchemaFound("Database schema not found") - - version = result[0]["version"] - logger.info(f"Connected to SurrealDB, using schema version {version}") - - if version != EXPECTED_VERSION: - raise InvalidDatabaseSchema( - f"Version mismatch. Expected {EXPECTED_VERSION}, got {version}" - ) - except Exception as e: - logger.error(e) - raise e - - def repo_create(table: str, data: Dict[str, Any]): query = f"CREATE {table} CONTENT {data};" # vars = {"table": table, "data": data} @@ -89,10 +66,3 @@ def repo_relate(source: str, relationship: str, target: str): result = repo_query(query) logger.debug(f"RELATE query result: {result}") return result - - -def execute_migration(script: str): - with open(f"database/{script}", "r") as file: - content = file.read() - - return repo_query(content) diff --git a/open_notebook/domain/base.py b/open_notebook/domain/base.py index 4fca352..0da80b5 100644 --- a/open_notebook/domain/base.py +++ b/open_notebook/domain/base.py @@ -4,18 +4,18 @@ from typing import Any, ClassVar, Dict, List, Optional, Type, TypeVar from loguru import logger from pydantic import BaseModel, ValidationError, field_validator -from open_notebook.exceptions import ( - DatabaseOperationError, - InvalidInputError, - NotFoundError, -) -from open_notebook.repository import ( +from open_notebook.database.repository import ( repo_create, repo_delete, repo_query, repo_relate, repo_update, ) +from open_notebook.exceptions import ( + DatabaseOperationError, + InvalidInputError, + NotFoundError, +) T = TypeVar("T", bound="ObjectModel") diff --git a/open_notebook/domain/models.py b/open_notebook/domain/models.py index 12074ff..752fba6 100644 --- a/open_notebook/domain/models.py +++ b/open_notebook/domain/models.py @@ -3,11 +3,11 @@ from typing import ClassVar, Optional from loguru import logger from pydantic import BaseModel -from open_notebook.domain.base import ObjectModel -from open_notebook.repository import ( +from open_notebook.database.repository import ( repo_query, repo_update, ) +from open_notebook.domain.base import ObjectModel class Model(ObjectModel): diff --git a/open_notebook/domain/notebook.py b/open_notebook/domain/notebook.py index b917548..e52ad8a 100644 --- a/open_notebook/domain/notebook.py +++ b/open_notebook/domain/notebook.py @@ -6,6 +6,10 @@ from loguru import logger from pydantic import BaseModel, Field, field_validator from open_notebook.config import EMBEDDING_MODEL +from open_notebook.database.repository import ( + repo_create, + repo_query, +) from open_notebook.domain.base import ObjectModel from open_notebook.exceptions import ( DatabaseOperationError, @@ -13,10 +17,6 @@ from open_notebook.exceptions import ( ) from open_notebook.graphs.multipattern import graph as pattern_graph from open_notebook.graphs.recursive_toc import graph as toc_graph -from open_notebook.repository import ( - repo_create, - repo_query, -) from open_notebook.utils import split_text, surreal_clean diff --git a/open_notebook/exceptions.py b/open_notebook/exceptions.py index 501e67a..49be004 100644 --- a/open_notebook/exceptions.py +++ b/open_notebook/exceptions.py @@ -16,12 +16,6 @@ class UnsupportedTypeException(OpenNotebookError): pass -class NoSchemaFound(OpenNotebookError): - """Raised when a database schema is not found.""" - - pass - - class InvalidInputError(OpenNotebookError): """Raised when invalid input is provided.""" @@ -70,12 +64,6 @@ class NetworkError(OpenNotebookError): pass -class InvalidDatabaseSchema(OpenNotebookError): - """Raised when the database is not under the expected schema.""" - - pass - - class NoTranscriptFound(OpenNotebookError): """Raised when no transcript is found for a video."""