import os from pathlib import Path from chonkie import AutoEmbeddings, CodeChunker, RecursiveChunker from dotenv import load_dotenv from langchain_community.chat_models import ChatLiteLLM from rerankers import Reranker # Get the base directory of the project BASE_DIR = Path(__file__).resolve().parent.parent.parent env_file = BASE_DIR / ".env" load_dotenv(env_file) class Config: # Database DATABASE_URL = os.getenv("DATABASE_URL") # Google OAuth GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID") GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET") NEXT_FRONTEND_URL = os.getenv("NEXT_FRONTEND_URL") # LONG-CONTEXT LLMS LONG_CONTEXT_LLM = os.getenv("LONG_CONTEXT_LLM") long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM) # GPT Researcher FAST_LLM = os.getenv("FAST_LLM") STRATEGIC_LLM = os.getenv("STRATEGIC_LLM") fast_llm_instance = ChatLiteLLM(model=FAST_LLM) strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM) # Chonkie Configuration | Edit this to your needs EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL") embedding_model_instance = AutoEmbeddings.get_embeddings(EMBEDDING_MODEL) chunker_instance = RecursiveChunker( chunk_size=getattr(embedding_model_instance, 'max_seq_length', 512) ) code_chunker_instance = CodeChunker( chunk_size=getattr(embedding_model_instance, 'max_seq_length', 512) ) # Reranker's Configuration | Pinecode, Cohere etc. Read more at https://github.com/AnswerDotAI/rerankers?tab=readme-ov-file#usage RERANKERS_MODEL_NAME = os.getenv("RERANKERS_MODEL_NAME") RERANKERS_MODEL_TYPE = os.getenv("RERANKERS_MODEL_TYPE") reranker_instance = Reranker( model_name=RERANKERS_MODEL_NAME, model_type=RERANKERS_MODEL_TYPE, ) # OAuth JWT SECRET_KEY = os.getenv("SECRET_KEY") # Unstructured API Key UNSTRUCTURED_API_KEY = os.getenv("UNSTRUCTURED_API_KEY") # Firecrawl API Key FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY", None) # Validation Checks # Check embedding dimension if hasattr(embedding_model_instance, 'dimension') and embedding_model_instance.dimension > 2000: raise ValueError( f"Embedding dimension for Model: {EMBEDDING_MODEL} " f"has {embedding_model_instance.dimension} dimensions, which " f"exceeds the maximum of 2000 allowed by PGVector." ) @classmethod def get_settings(cls): """Get all settings as a dictionary.""" return { key: value for key, value in cls.__dict__.items() if not key.startswith("_") and not callable(value) } # Create a config instance config = Config()