mirror of
https://github.com/supermemoryai/supermemory.git
synced 2026-05-18 14:48:12 +00:00
Some checks failed
Publish AI SDK / publish (push) Has been cancelled
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
877 lines
28 KiB
Text
877 lines
28 KiB
Text
---
|
|
title: "Document Q&A System"
|
|
description: "Build a chatbot that answers questions from your documents with citations and source references"
|
|
---
|
|
|
|
Create a powerful document Q&A system that can ingest PDFs, text files, and web pages, then answer questions with accurate citations. Perfect for documentation sites, research databases, or internal knowledge bases.
|
|
|
|
## What You'll Build
|
|
|
|
A document Q&A system that:
|
|
- **Ingests multiple file types** (PDFs, DOCX, text, URLs)
|
|
- **Answers questions accurately** with source citations
|
|
- **Provides source references** with page numbers and document titles
|
|
- **Handles follow-up questions** with conversation context
|
|
- **Supports multiple document collections** for different topics
|
|
|
|
## Prerequisites
|
|
|
|
- Node.js 18+ or Python 3.8+
|
|
- Supermemory API key
|
|
- OpenAI API key
|
|
- Basic understanding of file handling
|
|
|
|
## Implementation
|
|
|
|
### Step 1: Document Processing System
|
|
|
|
<Tabs>
|
|
<Tab title="Next.js">
|
|
```typescript lib/document-processor.ts
|
|
import { Supermemory } from 'supermemory'
|
|
|
|
const client = new Supermemory({
|
|
apiKey: process.env.SUPERMEMORY_API_KEY!
|
|
})
|
|
|
|
interface DocumentUpload {
|
|
file: File
|
|
collection: string
|
|
metadata?: Record<string, any>
|
|
}
|
|
|
|
export class DocumentProcessor {
|
|
async uploadDocument({ file, collection, metadata = {} }: DocumentUpload) {
|
|
try {
|
|
const formData = new FormData()
|
|
formData.append('file', file)
|
|
formData.append('containerTags', JSON.stringify([collection]))
|
|
formData.append('metadata', JSON.stringify({
|
|
originalName: file.name,
|
|
fileType: file.type,
|
|
uploadedAt: new Date().toISOString(),
|
|
...metadata
|
|
}))
|
|
|
|
const response = await fetch('/api/upload-document', {
|
|
method: 'POST',
|
|
body: formData
|
|
})
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Upload failed: ${response.statusText}`)
|
|
}
|
|
|
|
return await response.json()
|
|
} catch (error) {
|
|
console.error('Document upload error:', error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
async uploadURL({ url, collection, metadata = {} }: { url: string, collection: string, metadata?: Record<string, any> }) {
|
|
try {
|
|
const result = await client.add({
|
|
content: url,
|
|
containerTag: collection,
|
|
metadata: {
|
|
type: 'url',
|
|
originalUrl: url,
|
|
uploadedAt: new Date().toISOString(),
|
|
...metadata
|
|
}
|
|
})
|
|
|
|
return result
|
|
} catch (error) {
|
|
console.error('URL upload error:', error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
async getDocumentStatus(documentId: string) {
|
|
try {
|
|
const memory = await client.documents.get(documentId)
|
|
return {
|
|
id: memory.id,
|
|
status: memory.status,
|
|
title: memory.title,
|
|
progress: memory.metadata?.progress || 0
|
|
}
|
|
} catch (error) {
|
|
console.error('Status check error:', error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
async listDocuments(collection: string) {
|
|
try {
|
|
const memories = await client.documents.list({
|
|
containerTags: [collection],
|
|
limit: 50,
|
|
sort: 'updatedAt',
|
|
order: 'desc'
|
|
})
|
|
|
|
return memories.memories.map(memory => ({
|
|
id: memory.id,
|
|
title: memory.title || memory.metadata?.originalName || 'Untitled',
|
|
type: memory.metadata?.fileType || memory.metadata?.type || 'unknown',
|
|
uploadedAt: memory.metadata?.uploadedAt,
|
|
status: memory.status,
|
|
url: memory.metadata?.originalUrl
|
|
}))
|
|
} catch (error) {
|
|
console.error('List documents error:', error)
|
|
throw error
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
```typescript app/api/upload-document/route.ts
|
|
import { NextRequest, NextResponse } from 'next/server'
|
|
import { Supermemory } from 'supermemory'
|
|
|
|
const client = new Supermemory({
|
|
apiKey: process.env.SUPERMEMORY_API_KEY!
|
|
})
|
|
|
|
export async function POST(request: NextRequest) {
|
|
try {
|
|
const formData = await request.formData()
|
|
const file = formData.get('file') as File
|
|
const containerTags = JSON.parse(formData.get('containerTags') as string)
|
|
const metadata = JSON.parse(formData.get('metadata') as string || '{}')
|
|
|
|
if (!file) {
|
|
return NextResponse.json({ error: 'No file provided' }, { status: 400 })
|
|
}
|
|
|
|
const result = await client.documents.uploadFile({
|
|
file: file,
|
|
containerTags: JSON.stringify(containerTags),
|
|
metadata: JSON.stringify(metadata)
|
|
})
|
|
|
|
return NextResponse.json({
|
|
success: true,
|
|
documentId: result.id,
|
|
message: 'Document uploaded successfully'
|
|
})
|
|
|
|
} catch (error) {
|
|
console.error('Upload error:', error)
|
|
return NextResponse.json(
|
|
{ error: 'Upload failed', details: error.message },
|
|
{ status: 500 }
|
|
)
|
|
}
|
|
}
|
|
```
|
|
</Tab>
|
|
|
|
<Tab title="Python">
|
|
```python document_processor.py
|
|
from supermemory import Supermemory
|
|
import os
|
|
import json
|
|
from typing import Dict, List, Any, Optional
|
|
import requests
|
|
from datetime import datetime
|
|
|
|
class DocumentProcessor:
|
|
def __init__(self):
|
|
self.client = Supermemory(api_key=os.getenv("SUPERMEMORY_API_KEY"))
|
|
|
|
def upload_file(self, file_path: str, collection: str, metadata: Dict[str, Any] = None) -> Dict:
|
|
"""Upload a local file to Supermemory"""
|
|
if metadata is None:
|
|
metadata = {}
|
|
|
|
try:
|
|
with open(file_path, 'rb') as file:
|
|
result = self.client.documents.upload_file(
|
|
file=file,
|
|
container_tags=collection,
|
|
metadata=json.dumps({
|
|
'originalName': os.path.basename(file_path),
|
|
'fileType': os.path.splitext(file_path)[1],
|
|
'uploadedAt': datetime.now().isoformat(),
|
|
**metadata
|
|
})
|
|
)
|
|
return result
|
|
except Exception as e:
|
|
print(f"File upload error: {e}")
|
|
raise
|
|
|
|
def upload_url(self, url: str, collection: str, metadata: Dict[str, Any] = None) -> Dict:
|
|
"""Upload URL content to Supermemory"""
|
|
if metadata is None:
|
|
metadata = {}
|
|
|
|
try:
|
|
result = self.client.add(
|
|
content=url,
|
|
container_tag=collection,
|
|
metadata={
|
|
'type': 'url',
|
|
'originalUrl': url,
|
|
'uploadedAt': datetime.now().isoformat(),
|
|
**metadata
|
|
}
|
|
)
|
|
return result
|
|
except Exception as e:
|
|
print(f"URL upload error: {e}")
|
|
raise
|
|
|
|
def get_document_status(self, document_id: str) -> Dict:
|
|
"""Check document processing status"""
|
|
try:
|
|
memory = self.client.documents.get(document_id)
|
|
return {
|
|
'id': memory.id,
|
|
'status': memory.status,
|
|
'title': memory.title,
|
|
'progress': memory.metadata.get('progress', 0) if memory.metadata else 0
|
|
}
|
|
except Exception as e:
|
|
print(f"Status check error: {e}")
|
|
raise
|
|
|
|
def list_documents(self, collection: str) -> List[Dict]:
|
|
"""List all documents in a collection"""
|
|
try:
|
|
memories = self.client.documents.list(
|
|
container_tags=[collection],
|
|
limit=50,
|
|
sort='updatedAt',
|
|
order='desc'
|
|
)
|
|
|
|
return [
|
|
{
|
|
'id': memory.id,
|
|
'title': (memory.title or
|
|
memory.metadata.get('originalName') or
|
|
'Untitled' if memory.metadata else 'Untitled'),
|
|
'type': (memory.metadata.get('fileType') or
|
|
memory.metadata.get('type') or
|
|
'unknown' if memory.metadata else 'unknown'),
|
|
'uploadedAt': memory.metadata.get('uploadedAt') if memory.metadata else None,
|
|
'status': memory.status,
|
|
'url': memory.metadata.get('originalUrl') if memory.metadata else None
|
|
}
|
|
for memory in memories.memories
|
|
]
|
|
except Exception as e:
|
|
print(f"List documents error: {e}")
|
|
raise
|
|
```
|
|
</Tab>
|
|
</Tabs>
|
|
|
|
### Step 2: Q&A API with Citations
|
|
|
|
<Tabs>
|
|
<Tab title="Next.js API Route">
|
|
```typescript app/api/qa/route.ts
|
|
import { streamText } from 'ai'
|
|
import { createOpenAI } from '@ai-sdk/openai'
|
|
import { Supermemory } from 'supermemory'
|
|
|
|
const openai = createOpenAI({
|
|
apiKey: process.env.OPENAI_API_KEY!
|
|
})
|
|
|
|
const client = new Supermemory({
|
|
apiKey: process.env.SUPERMEMORY_API_KEY!
|
|
})
|
|
|
|
export async function POST(request: Request) {
|
|
const { question, collection, conversationHistory = [] } = await request.json()
|
|
|
|
try {
|
|
// Search for relevant documents
|
|
const searchResults = await client.search.documents({
|
|
q: question,
|
|
containerTags: [collection],
|
|
limit: 8,
|
|
rerank: true,
|
|
includeFullDocs: false,
|
|
includeSummary: true,
|
|
onlyMatchingChunks: false,
|
|
chunkThreshold: 0.7
|
|
})
|
|
|
|
if (searchResults.results.length === 0) {
|
|
return Response.json({
|
|
answer: "I couldn't find any relevant information in the uploaded documents to answer your question.",
|
|
sources: [],
|
|
confidence: 0
|
|
})
|
|
}
|
|
|
|
// Prepare context from search results
|
|
const context = searchResults.results.map((result, index) => {
|
|
const chunks = result.chunks
|
|
.filter(chunk => chunk.isRelevant)
|
|
.slice(0, 3)
|
|
.map(chunk => chunk.content)
|
|
.join('\n\n')
|
|
|
|
return `[Document ${index + 1}: "${result.title}"]\n${chunks}`
|
|
}).join('\n\n---\n\n')
|
|
|
|
// Prepare sources for citation
|
|
const sources = searchResults.results.map((result, index) => ({
|
|
id: result.documentId,
|
|
title: result.title,
|
|
type: result.type,
|
|
relevantChunks: result.chunks.filter(chunk => chunk.isRelevant).length,
|
|
score: result.score,
|
|
citationNumber: index + 1
|
|
}))
|
|
|
|
const messages = [
|
|
...conversationHistory,
|
|
{
|
|
role: 'user' as const,
|
|
content: question
|
|
}
|
|
]
|
|
|
|
const result = await streamText({
|
|
model: openai('gpt-5'),
|
|
messages,
|
|
system: `You are a helpful document Q&A assistant. Answer questions based ONLY on the provided document context.
|
|
|
|
CONTEXT FROM DOCUMENTS:
|
|
${context}
|
|
|
|
INSTRUCTIONS:
|
|
1. Answer the question using ONLY the information from the provided documents
|
|
2. Include specific citations in your response using [Document X] format
|
|
3. If the documents don't contain enough information, say so clearly
|
|
4. Be accurate and quote directly when possible
|
|
5. If multiple documents support a point, cite all relevant ones
|
|
6. Maintain a helpful, professional tone
|
|
|
|
CITATION FORMAT:
|
|
- Use [Document 1], [Document 2], etc. to cite sources
|
|
- Place citations after the relevant information
|
|
- Example: "The process involves three steps [Document 1]. However, some experts recommend a four-step approach [Document 3]."
|
|
|
|
If the question cannot be answered from the provided documents, respond with: "I don't have enough information in the provided documents to answer this question accurately."`,
|
|
temperature: 0.1,
|
|
maxTokens: 1000
|
|
})
|
|
|
|
return result.toAIStreamResponse({
|
|
data: {
|
|
sources,
|
|
searchResultsCount: searchResults.results.length,
|
|
totalResults: searchResults.total
|
|
}
|
|
})
|
|
|
|
} catch (error) {
|
|
console.error('Q&A error:', error)
|
|
return Response.json(
|
|
{ error: 'Failed to process question', details: error.message },
|
|
{ status: 500 }
|
|
)
|
|
}
|
|
}
|
|
```
|
|
</Tab>
|
|
|
|
<Tab title="Python FastAPI">
|
|
```python qa_api.py
|
|
from fastapi import FastAPI, HTTPException
|
|
from fastapi.responses import StreamingResponse
|
|
from pydantic import BaseModel
|
|
from typing import List, Dict, Any, Optional
|
|
import openai
|
|
from supermemory import Supermemory
|
|
import json
|
|
import os
|
|
|
|
app = FastAPI()
|
|
|
|
openai_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
supermemory_client = Supermemory(api_key=os.getenv("SUPERMEMORY_API_KEY"))
|
|
|
|
class QARequest(BaseModel):
|
|
question: str
|
|
collection: str
|
|
conversationHistory: List[Dict[str, str]] = []
|
|
|
|
class QAResponse(BaseModel):
|
|
answer: str
|
|
sources: List[Dict[str, Any]]
|
|
confidence: float
|
|
searchResultsCount: int
|
|
|
|
@app.post("/qa")
|
|
async def answer_question(request: QARequest):
|
|
try:
|
|
# Search for relevant documents
|
|
search_results = supermemory_client.search.documents(
|
|
q=request.question,
|
|
container_tags=[request.collection],
|
|
limit=8,
|
|
rerank=True,
|
|
include_full_docs=False,
|
|
include_summary=True,
|
|
only_matching_chunks=False,
|
|
chunk_threshold=0.7
|
|
)
|
|
|
|
if not search_results.results:
|
|
return QAResponse(
|
|
answer="I couldn't find any relevant information in the uploaded documents to answer your question.",
|
|
sources=[],
|
|
confidence=0,
|
|
searchResultsCount=0
|
|
)
|
|
|
|
# Prepare context from search results
|
|
context_parts = []
|
|
sources = []
|
|
|
|
for index, result in enumerate(search_results.results):
|
|
relevant_chunks = [
|
|
chunk.content for chunk in result.chunks
|
|
if chunk.is_relevant
|
|
][:3]
|
|
|
|
chunk_text = '\n\n'.join(relevant_chunks)
|
|
context_parts.append(f'[Document {index + 1}: "{result.title}"]\n{chunk_text}')
|
|
|
|
sources.append({
|
|
'id': result.document_id,
|
|
'title': result.title,
|
|
'type': result.type,
|
|
'relevantChunks': len([c for c in result.chunks if c.is_relevant]),
|
|
'score': result.score,
|
|
'citationNumber': index + 1
|
|
})
|
|
|
|
context = '\n\n---\n\n'.join(context_parts)
|
|
|
|
# Prepare messages
|
|
messages = [
|
|
{
|
|
"role": "system",
|
|
"content": f"""You are a helpful document Q&A assistant. Answer questions based ONLY on the provided document context.
|
|
|
|
CONTEXT FROM DOCUMENTS:
|
|
{context}
|
|
|
|
INSTRUCTIONS:
|
|
1. Answer the question using ONLY the information from the provided documents
|
|
2. Include specific citations in your response using [Document X] format
|
|
3. If the documents don't contain enough information, say so clearly
|
|
4. Be accurate and quote directly when possible
|
|
5. If multiple documents support a point, cite all relevant ones
|
|
6. Maintain a helpful, professional tone
|
|
|
|
CITATION FORMAT:
|
|
- Use [Document 1], [Document 2], etc. to cite sources
|
|
- Place citations after the relevant information
|
|
- Example: "The process involves three steps [Document 1]. However, some experts recommend a four-step approach [Document 3]."
|
|
|
|
If the question cannot be answered from the provided documents, respond with: "I don't have enough information in the provided documents to answer this question accurately." """
|
|
}
|
|
]
|
|
|
|
# Add conversation history
|
|
messages.extend(request.conversationHistory)
|
|
messages.append({"role": "user", "content": request.question})
|
|
|
|
# Get AI response
|
|
response = await openai_client.chat.completions.create(
|
|
model="gpt-5",
|
|
messages=messages,
|
|
temperature=0.1,
|
|
max_tokens=1000
|
|
)
|
|
|
|
answer = response.choices[0].message.content
|
|
|
|
return QAResponse(
|
|
answer=answer,
|
|
sources=sources,
|
|
confidence=min(search_results.results[0].score if search_results.results else 0, 1.0),
|
|
searchResultsCount=len(search_results.results)
|
|
)
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Failed to process question: {str(e)}")
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
```
|
|
</Tab>
|
|
</Tabs>
|
|
|
|
### Step 3: Frontend Interface
|
|
|
|
```tsx app/qa/page.tsx
|
|
'use client'
|
|
|
|
import { useState, useRef } from 'react'
|
|
import { useChat } from 'ai/react'
|
|
import { DocumentProcessor } from '@/lib/document-processor'
|
|
|
|
interface Document {
|
|
id: string
|
|
title: string
|
|
type: string
|
|
status: string
|
|
uploadedAt: string
|
|
}
|
|
|
|
interface Source {
|
|
id: string
|
|
title: string
|
|
citationNumber: number
|
|
score: number
|
|
relevantChunks: number
|
|
}
|
|
|
|
export default function DocumentQA() {
|
|
const [collection, setCollection] = useState('default-docs')
|
|
const [documents, setDocuments] = useState<Document[]>([])
|
|
const [sources, setSources] = useState<Source[]>([])
|
|
const [isUploading, setIsUploading] = useState(false)
|
|
const [uploadProgress, setUploadProgress] = useState<Record<string, number>>({})
|
|
const fileInputRef = useRef<HTMLInputElement>(null)
|
|
|
|
const processor = new DocumentProcessor()
|
|
|
|
const { messages, input, handleInputChange, handleSubmit, isLoading } = useChat({
|
|
api: '/api/qa',
|
|
body: {
|
|
collection
|
|
},
|
|
onFinish: (message, { data }) => {
|
|
if (data?.sources) {
|
|
setSources(data.sources)
|
|
}
|
|
}
|
|
})
|
|
|
|
const handleFileUpload = async (event: React.ChangeEvent<HTMLInputElement>) => {
|
|
const files = event.target.files
|
|
if (!files || files.length === 0) return
|
|
|
|
setIsUploading(true)
|
|
const newProgress: Record<string, number> = {}
|
|
|
|
try {
|
|
for (const file of Array.from(files)) {
|
|
newProgress[file.name] = 0
|
|
setUploadProgress({ ...newProgress })
|
|
|
|
await processor.uploadDocument({
|
|
file,
|
|
collection,
|
|
metadata: {
|
|
uploadedBy: 'user',
|
|
category: 'qa-document'
|
|
}
|
|
})
|
|
|
|
newProgress[file.name] = 100
|
|
setUploadProgress({ ...newProgress })
|
|
}
|
|
|
|
// Refresh document list
|
|
await loadDocuments()
|
|
|
|
// Clear file input
|
|
if (fileInputRef.current) {
|
|
fileInputRef.current.value = ''
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error('Upload failed:', error)
|
|
alert('Upload failed: ' + error.message)
|
|
} finally {
|
|
setIsUploading(false)
|
|
setUploadProgress({})
|
|
}
|
|
}
|
|
|
|
const loadDocuments = async () => {
|
|
try {
|
|
const docs = await processor.listDocuments(collection)
|
|
setDocuments(docs)
|
|
} catch (error) {
|
|
console.error('Failed to load documents:', error)
|
|
}
|
|
}
|
|
|
|
const formatSources = (sources: Source[]) => {
|
|
if (!sources || sources.length === 0) return null
|
|
|
|
return (
|
|
<div className="mt-4 p-4 bg-gray-50 border border-gray-200 rounded-lg">
|
|
<h3 className="text-sm font-semibold text-gray-700 mb-2">Sources:</h3>
|
|
<div className="space-y-2">
|
|
{sources.map((source) => (
|
|
<div key={source.id} className="flex items-center space-x-2 text-sm">
|
|
<span className="bg-blue-100 text-blue-800 px-2 py-1 rounded text-xs font-mono">
|
|
Document {source.citationNumber}
|
|
</span>
|
|
<span className="text-gray-700">{source.title}</span>
|
|
<span className="text-gray-500">
|
|
({source.relevantChunks} relevant chunks, {(source.score * 100).toFixed(1)}% match)
|
|
</span>
|
|
</div>
|
|
))}
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|
|
|
|
return (
|
|
<div className="max-w-6xl mx-auto p-6">
|
|
<div className="grid grid-cols-1 lg:grid-cols-3 gap-6">
|
|
{/* Document Management Panel */}
|
|
<div className="lg:col-span-1">
|
|
<div className="bg-white border border-gray-200 rounded-lg p-6">
|
|
<h2 className="text-lg font-semibold mb-4">Document Collection</h2>
|
|
|
|
{/* Collection Selector */}
|
|
<div className="mb-4">
|
|
<label className="block text-sm font-medium text-gray-700 mb-2">
|
|
Collection Name
|
|
</label>
|
|
<input
|
|
type="text"
|
|
value={collection}
|
|
onChange={(e) => setCollection(e.target.value)}
|
|
className="w-full p-2 border border-gray-300 rounded focus:ring-2 focus:ring-blue-500 focus:border-blue-500"
|
|
placeholder="e.g., company-docs"
|
|
/>
|
|
</div>
|
|
|
|
{/* File Upload */}
|
|
<div className="mb-4">
|
|
<input
|
|
ref={fileInputRef}
|
|
type="file"
|
|
multiple
|
|
accept=".pdf,.docx,.txt,.md"
|
|
onChange={handleFileUpload}
|
|
className="hidden"
|
|
/>
|
|
<button
|
|
onClick={() => fileInputRef.current?.click()}
|
|
disabled={isUploading}
|
|
className="w-full p-3 border-2 border-dashed border-gray-300 rounded-lg hover:border-blue-400 focus:ring-2 focus:ring-blue-500 disabled:opacity-50"
|
|
>
|
|
{isUploading ? 'Uploading...' : 'Upload Documents'}
|
|
</button>
|
|
</div>
|
|
|
|
{/* Upload Progress */}
|
|
{Object.keys(uploadProgress).length > 0 && (
|
|
<div className="mb-4 space-y-2">
|
|
{Object.entries(uploadProgress).map(([filename, progress]) => (
|
|
<div key={filename} className="text-sm">
|
|
<div className="flex justify-between">
|
|
<span className="truncate">{filename}</span>
|
|
<span>{progress}%</span>
|
|
</div>
|
|
<div className="w-full bg-gray-200 rounded-full h-2">
|
|
<div
|
|
className="bg-blue-600 h-2 rounded-full transition-all duration-300"
|
|
style={{ width: `${progress}%` }}
|
|
/>
|
|
</div>
|
|
</div>
|
|
))}
|
|
</div>
|
|
)}
|
|
|
|
{/* Document List */}
|
|
<div className="max-h-64 overflow-y-auto">
|
|
{documents.map((doc) => (
|
|
<div key={doc.id} className="mb-2 p-2 bg-gray-50 rounded text-sm">
|
|
<div className="font-medium truncate">{doc.title}</div>
|
|
<div className="text-gray-500 text-xs">
|
|
{doc.type} • {doc.status}
|
|
</div>
|
|
</div>
|
|
))}
|
|
</div>
|
|
|
|
<button
|
|
onClick={loadDocuments}
|
|
className="w-full mt-4 px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600"
|
|
>
|
|
Refresh Documents
|
|
</button>
|
|
</div>
|
|
</div>
|
|
|
|
{/* Q&A Interface */}
|
|
<div className="lg:col-span-2">
|
|
<div className="bg-white border border-gray-200 rounded-lg p-6">
|
|
<h2 className="text-lg font-semibold mb-4">Ask Questions</h2>
|
|
|
|
{/* Messages */}
|
|
<div className="h-96 overflow-y-auto mb-4 space-y-4">
|
|
{messages.length === 0 && (
|
|
<div className="text-gray-500 text-center py-8">
|
|
Upload documents and ask questions to get started!
|
|
|
|
<div className="mt-4 text-sm">
|
|
<p className="font-medium">Try asking:</p>
|
|
<ul className="mt-2 space-y-1">
|
|
<li>"What are the main findings?"</li>
|
|
<li>"Summarize the key points"</li>
|
|
<li>"What does section 3 say about...?"</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
)}
|
|
|
|
{messages.map((message) => (
|
|
<div
|
|
key={message.id}
|
|
className={`p-4 rounded-lg ${
|
|
message.role === 'user'
|
|
? 'bg-blue-500 text-white ml-8'
|
|
: 'bg-gray-100 mr-8'
|
|
}`}
|
|
>
|
|
<div className="whitespace-pre-wrap">{message.content}</div>
|
|
|
|
{message.role === 'assistant' && sources.length > 0 && (
|
|
formatSources(sources)
|
|
)}
|
|
</div>
|
|
))}
|
|
|
|
{isLoading && (
|
|
<div className="bg-gray-100 p-4 rounded-lg mr-8">
|
|
<div className="flex items-center space-x-2">
|
|
<div className="animate-spin rounded-full h-4 w-4 border-b-2 border-blue-600"></div>
|
|
<span>Searching documents and generating answer...</span>
|
|
</div>
|
|
</div>
|
|
)}
|
|
</div>
|
|
|
|
{/* Input */}
|
|
<form onSubmit={handleSubmit} className="flex gap-2">
|
|
<input
|
|
value={input}
|
|
onChange={handleInputChange}
|
|
placeholder="Ask a question about your documents..."
|
|
className="flex-1 p-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500"
|
|
disabled={isLoading || documents.length === 0}
|
|
/>
|
|
<button
|
|
type="submit"
|
|
disabled={isLoading || !input.trim() || documents.length === 0}
|
|
className="px-6 py-3 bg-blue-500 text-white rounded-lg hover:bg-blue-600 disabled:opacity-50 disabled:cursor-not-allowed"
|
|
>
|
|
Ask
|
|
</button>
|
|
</form>
|
|
|
|
{documents.length === 0 && (
|
|
<p className="text-sm text-gray-500 mt-2">
|
|
Upload documents first to enable questions
|
|
</p>
|
|
)}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
)
|
|
}
|
|
```
|
|
|
|
## Testing Your Q&A System
|
|
|
|
### Step 4: Test Document Processing
|
|
|
|
1. **Upload Test Documents**:
|
|
- Upload a PDF manual or research paper
|
|
- Add a few web articles via URL
|
|
- Upload some text files with different topics
|
|
|
|
2. **Test Question Types**:
|
|
```
|
|
Factual: "What is the definition of X mentioned in the documents?"
|
|
Analytical: "What are the pros and cons of approach Y?"
|
|
Comparative: "How does method A compare to method B?"
|
|
Summarization: "Summarize the main findings"
|
|
```
|
|
|
|
3. **Verify Citations**:
|
|
- Check that citations appear in responses
|
|
- Verify citation numbers match source list
|
|
- Ensure sources show relevant metadata
|
|
|
|
## Production Considerations
|
|
|
|
### Performance Optimization
|
|
|
|
```typescript
|
|
// Implement caching for frequently asked questions
|
|
const cacheKey = `qa:${collection}:${hashQuery(question)}`
|
|
const cachedResponse = await redis.get(cacheKey)
|
|
|
|
if (cachedResponse) {
|
|
return JSON.parse(cachedResponse)
|
|
}
|
|
|
|
// Cache response for 1 hour
|
|
await redis.setex(cacheKey, 3600, JSON.stringify(response))
|
|
```
|
|
|
|
### Advanced Features
|
|
|
|
1. **Follow-up Questions**:
|
|
```typescript
|
|
// Track conversation context
|
|
const conversationHistory = messages.slice(-6) // Last 3 exchanges
|
|
```
|
|
|
|
2. **Answer Confidence Scoring**:
|
|
```typescript
|
|
const confidence = calculateConfidence({
|
|
searchScore: searchResults.results[0]?.score || 0,
|
|
resultCount: searchResults.results.length,
|
|
chunkRelevance: avgChunkRelevance
|
|
})
|
|
```
|
|
|
|
3. **Multi-language Support**:
|
|
```typescript
|
|
// Detect document language and adapt search
|
|
const detectedLanguage = await detectLanguage(question)
|
|
const searchResults = await client.search.documents({
|
|
q: question,
|
|
filters: {
|
|
AND: [{ key: 'language', value: detectedLanguage }]
|
|
}
|
|
})
|
|
```
|
|
|
|
This recipe provides a complete foundation for building document Q&A systems with accurate citations and source tracking.
|
|
|
|
---
|
|
|
|
*Customize this recipe based on your specific document types and use cases.*
|