fix: Support for All Embeddings

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-04-30 00:10:50 -07:00
parent 1419d5d714
commit 42bde28781
3 changed files with 63 additions and 5 deletions

View file

@ -0,0 +1,58 @@
"""Remove char limit on title columns
Revision ID: 5
Revises: 4
Create Date: 2023-06-10 00:00:00.000000
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '5'
down_revision: Union[str, None] = '4'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Alter Chat table
op.alter_column('chats', 'title',
existing_type=sa.String(200),
type_=sa.String(),
existing_nullable=False)
# Alter Document table
op.alter_column('documents', 'title',
existing_type=sa.String(200),
type_=sa.String(),
existing_nullable=False)
# Alter Podcast table
op.alter_column('podcasts', 'title',
existing_type=sa.String(200),
type_=sa.String(),
existing_nullable=False)
def downgrade() -> None:
# Revert Chat table
op.alter_column('chats', 'title',
existing_type=sa.String(),
type_=sa.String(200),
existing_nullable=False)
# Revert Document table
op.alter_column('documents', 'title',
existing_type=sa.String(),
type_=sa.String(200),
existing_nullable=False)
# Revert Podcast table
op.alter_column('podcasts', 'title',
existing_type=sa.String(),
type_=sa.String(200),
existing_nullable=False)

View file

@ -38,10 +38,10 @@ class Config:
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
embedding_model_instance = AutoEmbeddings.get_embeddings(EMBEDDING_MODEL)
chunker_instance = RecursiveChunker(
chunk_size=embedding_model_instance.max_seq_length,
chunk_size=getattr(embedding_model_instance, 'max_seq_length', 512)
)
code_chunker_instance = CodeChunker(
chunk_size=embedding_model_instance.max_seq_length
chunk_size=getattr(embedding_model_instance, 'max_seq_length', 512)
)
# Reranker's Configuration | Pinecode, Cohere etc. Read more at https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file#usage

View file

@ -76,7 +76,7 @@ class Chat(BaseModel, TimestampMixin):
__tablename__ = "chats"
type = Column(SQLAlchemyEnum(ChatType), nullable=False)
title = Column(String(200), nullable=False, index=True)
title = Column(String, nullable=False, index=True)
initial_connectors = Column(ARRAY(String), nullable=True)
messages = Column(JSON, nullable=False)
@ -86,7 +86,7 @@ class Chat(BaseModel, TimestampMixin):
class Document(BaseModel, TimestampMixin):
__tablename__ = "documents"
title = Column(String(200), nullable=False, index=True)
title = Column(String, nullable=False, index=True)
document_type = Column(SQLAlchemyEnum(DocumentType), nullable=False)
document_metadata = Column(JSON, nullable=True)
@ -109,7 +109,7 @@ class Chunk(BaseModel, TimestampMixin):
class Podcast(BaseModel, TimestampMixin):
__tablename__ = "podcasts"
title = Column(String(200), nullable=False, index=True)
title = Column(String, nullable=False, index=True)
is_generated = Column(Boolean, nullable=False, default=False)
podcast_content = Column(Text, nullable=False, default="")
file_location = Column(String(500), nullable=False, default="")