mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-01 18:19:08 +00:00
fix: Support for All Embeddings
This commit is contained in:
parent
1419d5d714
commit
42bde28781
3 changed files with 63 additions and 5 deletions
|
@ -0,0 +1,58 @@
|
|||
"""Remove char limit on title columns
|
||||
|
||||
Revision ID: 5
|
||||
Revises: 4
|
||||
Create Date: 2023-06-10 00:00:00.000000
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '5'
|
||||
down_revision: Union[str, None] = '4'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Alter Chat table
|
||||
op.alter_column('chats', 'title',
|
||||
existing_type=sa.String(200),
|
||||
type_=sa.String(),
|
||||
existing_nullable=False)
|
||||
|
||||
# Alter Document table
|
||||
op.alter_column('documents', 'title',
|
||||
existing_type=sa.String(200),
|
||||
type_=sa.String(),
|
||||
existing_nullable=False)
|
||||
|
||||
# Alter Podcast table
|
||||
op.alter_column('podcasts', 'title',
|
||||
existing_type=sa.String(200),
|
||||
type_=sa.String(),
|
||||
existing_nullable=False)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Revert Chat table
|
||||
op.alter_column('chats', 'title',
|
||||
existing_type=sa.String(),
|
||||
type_=sa.String(200),
|
||||
existing_nullable=False)
|
||||
|
||||
# Revert Document table
|
||||
op.alter_column('documents', 'title',
|
||||
existing_type=sa.String(),
|
||||
type_=sa.String(200),
|
||||
existing_nullable=False)
|
||||
|
||||
# Revert Podcast table
|
||||
op.alter_column('podcasts', 'title',
|
||||
existing_type=sa.String(),
|
||||
type_=sa.String(200),
|
||||
existing_nullable=False)
|
|
@ -38,10 +38,10 @@ class Config:
|
|||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
|
||||
embedding_model_instance = AutoEmbeddings.get_embeddings(EMBEDDING_MODEL)
|
||||
chunker_instance = RecursiveChunker(
|
||||
chunk_size=embedding_model_instance.max_seq_length,
|
||||
chunk_size=getattr(embedding_model_instance, 'max_seq_length', 512)
|
||||
)
|
||||
code_chunker_instance = CodeChunker(
|
||||
chunk_size=embedding_model_instance.max_seq_length
|
||||
chunk_size=getattr(embedding_model_instance, 'max_seq_length', 512)
|
||||
)
|
||||
|
||||
# Reranker's Configuration | Pinecode, Cohere etc. Read more at https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file#usage
|
||||
|
|
|
@ -76,7 +76,7 @@ class Chat(BaseModel, TimestampMixin):
|
|||
__tablename__ = "chats"
|
||||
|
||||
type = Column(SQLAlchemyEnum(ChatType), nullable=False)
|
||||
title = Column(String(200), nullable=False, index=True)
|
||||
title = Column(String, nullable=False, index=True)
|
||||
initial_connectors = Column(ARRAY(String), nullable=True)
|
||||
messages = Column(JSON, nullable=False)
|
||||
|
||||
|
@ -86,7 +86,7 @@ class Chat(BaseModel, TimestampMixin):
|
|||
class Document(BaseModel, TimestampMixin):
|
||||
__tablename__ = "documents"
|
||||
|
||||
title = Column(String(200), nullable=False, index=True)
|
||||
title = Column(String, nullable=False, index=True)
|
||||
document_type = Column(SQLAlchemyEnum(DocumentType), nullable=False)
|
||||
document_metadata = Column(JSON, nullable=True)
|
||||
|
||||
|
@ -109,7 +109,7 @@ class Chunk(BaseModel, TimestampMixin):
|
|||
class Podcast(BaseModel, TimestampMixin):
|
||||
__tablename__ = "podcasts"
|
||||
|
||||
title = Column(String(200), nullable=False, index=True)
|
||||
title = Column(String, nullable=False, index=True)
|
||||
is_generated = Column(Boolean, nullable=False, default=False)
|
||||
podcast_content = Column(Text, nullable=False, default="")
|
||||
file_location = Column(String(500), nullable=False, default="")
|
||||
|
|
Loading…
Add table
Reference in a new issue