diff --git a/surfsense_backend/alembic/versions/5_remove_title_char_limit.py b/surfsense_backend/alembic/versions/5_remove_title_char_limit.py new file mode 100644 index 0000000..57ed108 --- /dev/null +++ b/surfsense_backend/alembic/versions/5_remove_title_char_limit.py @@ -0,0 +1,58 @@ +"""Remove char limit on title columns + +Revision ID: 5 +Revises: 4 +Create Date: 2023-06-10 00:00:00.000000 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '5' +down_revision: Union[str, None] = '4' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Alter Chat table + op.alter_column('chats', 'title', + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False) + + # Alter Document table + op.alter_column('documents', 'title', + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False) + + # Alter Podcast table + op.alter_column('podcasts', 'title', + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False) + + +def downgrade() -> None: + # Revert Chat table + op.alter_column('chats', 'title', + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False) + + # Revert Document table + op.alter_column('documents', 'title', + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False) + + # Revert Podcast table + op.alter_column('podcasts', 'title', + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False) \ No newline at end of file diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index 4adf2b7..91968aa 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -38,10 +38,10 @@ class Config: EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") embedding_model_instance = AutoEmbeddings.get_embeddings(EMBEDDING_MODEL) chunker_instance = RecursiveChunker( - chunk_size=embedding_model_instance.max_seq_length, + chunk_size=getattr(embedding_model_instance, 'max_seq_length', 512) ) code_chunker_instance = CodeChunker( - chunk_size=embedding_model_instance.max_seq_length + chunk_size=getattr(embedding_model_instance, 'max_seq_length', 512) ) # Reranker's Configuration | Pinecode, Cohere etc. Read more at https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file#usage diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 320f059..b4ee3e7 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -76,7 +76,7 @@ class Chat(BaseModel, TimestampMixin): __tablename__ = "chats" type = Column(SQLAlchemyEnum(ChatType), nullable=False) - title = Column(String(200), nullable=False, index=True) + title = Column(String, nullable=False, index=True) initial_connectors = Column(ARRAY(String), nullable=True) messages = Column(JSON, nullable=False) @@ -86,7 +86,7 @@ class Chat(BaseModel, TimestampMixin): class Document(BaseModel, TimestampMixin): __tablename__ = "documents" - title = Column(String(200), nullable=False, index=True) + title = Column(String, nullable=False, index=True) document_type = Column(SQLAlchemyEnum(DocumentType), nullable=False) document_metadata = Column(JSON, nullable=True) @@ -109,7 +109,7 @@ class Chunk(BaseModel, TimestampMixin): class Podcast(BaseModel, TimestampMixin): __tablename__ = "podcasts" - title = Column(String(200), nullable=False, index=True) + title = Column(String, nullable=False, index=True) is_generated = Column(Boolean, nullable=False, default=False) podcast_content = Column(Text, nullable=False, default="") file_location = Column(String(500), nullable=False, default="")