From ebcfd97a0e9aeec3bf11290d412006b4500c49b2 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 28 Aug 2025 22:58:55 -0700 Subject: [PATCH] fix: added basic context window check for summarization - Need to optimize. Better to have seperate class with long form summarization using chunking. --- .../app/utils/document_converters.py | 99 ++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index 8cc05af..69ba27c 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -1,10 +1,99 @@ import hashlib +from litellm import get_model_info, token_counter + from app.config import config from app.db import Chunk from app.prompts import SUMMARY_PROMPT_TEMPLATE +def get_model_context_window(model_name: str) -> int: + """Get the total context window size for a model (input + output tokens).""" + try: + model_info = get_model_info(model_name) + context_window = model_info.get("max_input_tokens", 4096) # Default fallback + return context_window + except Exception as e: + print( + f"Warning: Could not get model info for {model_name}, using default 4096 tokens. Error: {e}" + ) + return 4096 # Conservative fallback + + +def optimize_content_for_context_window( + content: str, document_metadata: dict | None, model_name: str +) -> str: + """ + Optimize content length to fit within model context window using binary search. + + Args: + content: Original document content + document_metadata: Optional metadata dictionary + model_name: Model name for token counting + + Returns: + Optimized content that fits within context window + """ + if not content: + return content + + # Get model context window + context_window = get_model_context_window(model_name) + + # Reserve tokens for: system prompt, metadata, template overhead, and output + # Conservative estimate: 2000 tokens for prompt + metadata + output buffer + # TODO: Calculate Summary System Prompt Token Count Here + reserved_tokens = 2000 + + # Add metadata token cost if present + if document_metadata: + metadata_text = ( + f"\n\n{document_metadata}\n\n" + ) + metadata_tokens = token_counter( + messages=[{"role": "user", "content": metadata_text}], model=model_name + ) + reserved_tokens += metadata_tokens + + available_tokens = context_window - reserved_tokens + + if available_tokens <= 100: # Minimum viable content + print(f"Warning: Very limited tokens available for content: {available_tokens}") + return content[:500] # Fallback to first 500 chars + + # Binary search to find optimal content length + left, right = 0, len(content) + optimal_length = 0 + + while left <= right: + mid = (left + right) // 2 + test_content = content[:mid] + + # Test token count for this content length + test_document = f"\n\n{test_content}\n\n" + test_tokens = token_counter( + messages=[{"role": "user", "content": test_document}], model=model_name + ) + + if test_tokens <= available_tokens: + optimal_length = mid + left = mid + 1 + else: + right = mid - 1 + + optimized_content = ( + content[:optimal_length] if optimal_length > 0 else content[:500] + ) + + if optimal_length < len(content): + print( + f"Content optimized: {len(content)} -> {optimal_length} chars " + f"to fit in {available_tokens} available tokens" + ) + + return optimized_content + + async def generate_document_summary( content: str, user_llm, @@ -21,8 +110,16 @@ async def generate_document_summary( Returns: Tuple of (enhanced_summary_content, summary_embedding) """ + # Get model name from user_llm for token counting + model_name = getattr(user_llm, "model", "gpt-3.5-turbo") # Fallback to default + + # Optimize content to fit within context window + optimized_content = optimize_content_for_context_window( + content, document_metadata, model_name + ) + summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm - content_with_metadata = f"\n\n{document_metadata}\n\n\n\n\n\n{content}\n\n" + content_with_metadata = f"\n\n{document_metadata}\n\n\n\n\n\n{optimized_content}\n\n" summary_result = await summary_chain.ainvoke({"document": content_with_metadata}) summary_content = summary_result.content