mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-03 19:19:10 +00:00
feat: Added Precision Search + Multiple Webpage Sessions Chat
This commit is contained in:
parent
24a8ebbb34
commit
dfeeb58ca2
3 changed files with 163 additions and 19 deletions
|
@ -85,7 +85,7 @@ CYPHER_GENERATION_PROMPT = PromptTemplate(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
DOC_DESCRIPTION_TEMPLATE = """Task:Give Detailed Description of the page content of the given document.
|
DOC_DESCRIPTION_TEMPLATE = DATE_TODAY + """Task:Give Detailed Description of the page content of the given document.
|
||||||
Instructions:
|
Instructions:
|
||||||
Provide as much details about metadata & page content as if you need to give human readable report of this Browsing session event.
|
Provide as much details about metadata & page content as if you need to give human readable report of this Browsing session event.
|
||||||
|
|
||||||
|
@ -104,4 +104,24 @@ DOCUMENT_METADATA_EXTRACTION_PROMT = ChatPromptTemplate.from_messages([("system"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
VECTOR_QUERY_GENERATION_TEMPLATE = DATE_TODAY + """You are a helpful assistant. You are given a user query and the examples of document on which user is asking query about.
|
||||||
|
Give instruction to machine how to search for the data based on user query.
|
||||||
|
|
||||||
|
Document Examples:
|
||||||
|
{examples}
|
||||||
|
|
||||||
|
Note: Only return the Query and nothing else. No explanation.
|
||||||
|
|
||||||
|
User Query: {query}
|
||||||
|
Helpful Answer:"""
|
||||||
|
|
||||||
|
VECTOR_QUERY_GENERATION_PROMT = PromptTemplate(
|
||||||
|
input_variables=["examples", "query"], template=VECTOR_QUERY_GENERATION_TEMPLATE
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,6 @@
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
class UserQuery(BaseModel):
|
|
||||||
query: str
|
|
||||||
neourl: str
|
|
||||||
neouser: str
|
|
||||||
neopass: str
|
|
||||||
openaikey: str
|
|
||||||
apisecretkey: str
|
|
||||||
|
|
||||||
class DescriptionResponse(BaseModel):
|
|
||||||
response: str
|
|
||||||
|
|
||||||
|
|
||||||
class DocMeta(BaseModel):
|
class DocMeta(BaseModel):
|
||||||
BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document")
|
BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document")
|
||||||
VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document")
|
VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document")
|
||||||
|
@ -22,6 +10,48 @@ class DocMeta(BaseModel):
|
||||||
VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
|
VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
|
||||||
VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document")
|
VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document")
|
||||||
|
|
||||||
|
class PrecisionQuery(BaseModel):
|
||||||
|
sessionid: Optional[str] = Field(default=None)
|
||||||
|
webpageurl: Optional[str] = Field(default=None)
|
||||||
|
daterange: Optional[List[str]]
|
||||||
|
timerange: Optional[List[int]]
|
||||||
|
neourl: str
|
||||||
|
neouser: str
|
||||||
|
neopass: str
|
||||||
|
openaikey: str
|
||||||
|
apisecretkey: str
|
||||||
|
|
||||||
|
class PrecisionResponse(BaseModel):
|
||||||
|
documents: List[DocMeta]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class UserQuery(BaseModel):
|
||||||
|
query: str
|
||||||
|
neourl: str
|
||||||
|
neouser: str
|
||||||
|
neopass: str
|
||||||
|
openaikey: str
|
||||||
|
apisecretkey: str
|
||||||
|
|
||||||
|
class ChatHistory(BaseModel):
|
||||||
|
type: str
|
||||||
|
content: str | List[DocMeta]
|
||||||
|
|
||||||
|
class UserQueryWithChatHistory(BaseModel):
|
||||||
|
chat: List[ChatHistory]
|
||||||
|
query: str
|
||||||
|
neourl: str
|
||||||
|
neouser: str
|
||||||
|
neopass: str
|
||||||
|
openaikey: str
|
||||||
|
apisecretkey: str
|
||||||
|
|
||||||
|
|
||||||
|
class DescriptionResponse(BaseModel):
|
||||||
|
response: str
|
||||||
|
|
||||||
|
|
||||||
class RetrivedDocListItem(BaseModel):
|
class RetrivedDocListItem(BaseModel):
|
||||||
metadata: DocMeta
|
metadata: DocMeta
|
||||||
pageContent: str
|
pageContent: str
|
||||||
|
|
|
@ -6,9 +6,11 @@ from langchain_core.documents import Document
|
||||||
from langchain_openai import OpenAIEmbeddings
|
from langchain_openai import OpenAIEmbeddings
|
||||||
from langchain_community.vectorstores import Neo4jVector
|
from langchain_community.vectorstores import Neo4jVector
|
||||||
from envs import ACCESS_TOKEN_EXPIRE_MINUTES, ALGORITHM, API_SECRET_KEY, SECRET_KEY
|
from envs import ACCESS_TOKEN_EXPIRE_MINUTES, ALGORITHM, API_SECRET_KEY, SECRET_KEY
|
||||||
from prompts import CYPHER_QA_PROMPT, DOC_DESCRIPTION_PROMPT, GRAPH_QUERY_GEN_PROMPT, SIMILARITY_SEARCH_PROMPT , CYPHER_GENERATION_PROMPT, DOCUMENT_METADATA_EXTRACTION_PROMT
|
from prompts import CYPHER_QA_PROMPT, DATE_TODAY, DOC_DESCRIPTION_PROMPT, GRAPH_QUERY_GEN_PROMPT, SIMILARITY_SEARCH_PROMPT , CYPHER_GENERATION_PROMPT, DOCUMENT_METADATA_EXTRACTION_PROMT
|
||||||
from pydmodels import DescriptionResponse, UserQuery, DocMeta, RetrivedDocList, UserQueryResponse, VectorSearchQuery
|
from pydmodels import DescriptionResponse, PrecisionQuery, PrecisionResponse, UserQuery, DocMeta, RetrivedDocList, UserQueryResponse, UserQueryWithChatHistory, VectorSearchQuery
|
||||||
from langchain_experimental.text_splitter import SemanticChunker
|
from langchain_experimental.text_splitter import SemanticChunker
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
|
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
|
||||||
|
|
||||||
#Our Imps
|
#Our Imps
|
||||||
from LLMGraphTransformer import LLMGraphTransformer
|
from LLMGraphTransformer import LLMGraphTransformer
|
||||||
|
@ -32,7 +34,7 @@ app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# GraphCypherQAChain
|
||||||
@app.post("/")
|
@app.post("/")
|
||||||
def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
|
def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
|
||||||
|
|
||||||
|
@ -53,9 +55,9 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
|
||||||
# Query Expansion
|
# Query Expansion
|
||||||
searchchain = GRAPH_QUERY_GEN_PROMPT | llm
|
searchchain = GRAPH_QUERY_GEN_PROMPT | llm
|
||||||
|
|
||||||
qry = searchchain.invoke({"question": data.query, "context": examples})
|
# qry = searchchain.invoke({"question": data.query, "context": examples})
|
||||||
|
|
||||||
query = qry.content
|
query = data.query #qry.content
|
||||||
|
|
||||||
embeddings = OpenAIEmbeddings(
|
embeddings = OpenAIEmbeddings(
|
||||||
model="text-embedding-ada-002",
|
model="text-embedding-ada-002",
|
||||||
|
@ -84,7 +86,7 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
|
||||||
embedding_node_property="embedding",
|
embedding_node_property="embedding",
|
||||||
)
|
)
|
||||||
|
|
||||||
docs = vector_index.similarity_search(query,k=5)
|
docs = vector_index.similarity_search(data.query,k=5)
|
||||||
|
|
||||||
docstoreturn = []
|
docstoreturn = []
|
||||||
|
|
||||||
|
@ -145,6 +147,98 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
|
||||||
|
|
||||||
return UserQueryResponse(relateddocs=docstoreturn,response=response.content)
|
return UserQueryResponse(relateddocs=docstoreturn,response=response.content)
|
||||||
|
|
||||||
|
#RETURN n LIMIT 25;
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/precision")
|
||||||
|
def get_precision_search_response(data: PrecisionQuery, response_model=PrecisionResponse):
|
||||||
|
if(data.apisecretkey != API_SECRET_KEY):
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
graph = Neo4jGraph(url=data.neourl, username=data.neouser, password=data.neopass)
|
||||||
|
|
||||||
|
GRAPH_QUERY = "MATCH (d:Document) WHERE d.VisitedWebPageDateWithTimeInISOString >= " + "'" + data.daterange[0] + "'" + " AND d.VisitedWebPageDateWithTimeInISOString <= " + "'" + data.daterange[1] + "'"
|
||||||
|
|
||||||
|
if(data.timerange[0] >= data.timerange[1]):
|
||||||
|
GRAPH_QUERY += " AND d.VisitedWebPageVisitDurationInMilliseconds >= 0"
|
||||||
|
else:
|
||||||
|
GRAPH_QUERY += " AND d.VisitedWebPageVisitDurationInMilliseconds >= "+ str(data.timerange[0]) + " AND d.VisitedWebPageVisitDurationInMilliseconds <= " + str(data.timerange[1])
|
||||||
|
|
||||||
|
if(data.webpageurl):
|
||||||
|
GRAPH_QUERY += " AND d.VisitedWebPageURL CONTAINS " + "'" + data.webpageurl.lower() + "'"
|
||||||
|
|
||||||
|
if(data.sessionid):
|
||||||
|
GRAPH_QUERY += " AND d.BrowsingSessionId = " + "'" + data.sessionid + "'"
|
||||||
|
|
||||||
|
GRAPH_QUERY += " RETURN d;"
|
||||||
|
|
||||||
|
graphdocs = graph.query(GRAPH_QUERY)
|
||||||
|
|
||||||
|
docsDict = {}
|
||||||
|
|
||||||
|
for d in graphdocs:
|
||||||
|
if d['d']['BrowsingSessionId'] not in docsDict:
|
||||||
|
docsDict[d['d']['BrowsingSessionId']] = d['d']
|
||||||
|
else:
|
||||||
|
docsDict[d['d']['BrowsingSessionId']]['text'] += d['d']['text']
|
||||||
|
|
||||||
|
docs = []
|
||||||
|
|
||||||
|
for x in docsDict.values():
|
||||||
|
docs.append(DocMeta(
|
||||||
|
BrowsingSessionId=x['BrowsingSessionId'],
|
||||||
|
VisitedWebPageURL=x['VisitedWebPageURL'],
|
||||||
|
VisitedWebPageVisitDurationInMilliseconds=x['VisitedWebPageVisitDurationInMilliseconds'],
|
||||||
|
VisitedWebPageTitle=x['VisitedWebPageTitle'],
|
||||||
|
VisitedWebPageReffererURL=x['VisitedWebPageReffererURL'],
|
||||||
|
VisitedWebPageDateWithTimeInISOString=x['VisitedWebPageDateWithTimeInISOString'],
|
||||||
|
VisitedWebPageContent=x['text']
|
||||||
|
))
|
||||||
|
|
||||||
|
return PrecisionResponse(documents=docs)
|
||||||
|
|
||||||
|
|
||||||
|
# Multi DOC Chat
|
||||||
|
@app.post("/chat/docs")
|
||||||
|
def doc_chat_with_history(data: UserQueryWithChatHistory, response_model=DescriptionResponse):
|
||||||
|
if(data.apisecretkey != API_SECRET_KEY):
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
llm = ChatOpenAI(
|
||||||
|
model="gpt-4o-mini",
|
||||||
|
temperature=0,
|
||||||
|
max_tokens=None,
|
||||||
|
timeout=None,
|
||||||
|
api_key=data.openaikey
|
||||||
|
)
|
||||||
|
|
||||||
|
chatHistory = []
|
||||||
|
|
||||||
|
for chat in data.chat:
|
||||||
|
if(chat.type == 'system'):
|
||||||
|
chatHistory.append(SystemMessage(content=DATE_TODAY + """You are an helpful assistant for question-answering tasks.
|
||||||
|
Use the following pieces of retrieved context to answer the question.
|
||||||
|
If you don't know the answer, just say that you don't know.
|
||||||
|
Context:""" + str(chat.content)))
|
||||||
|
|
||||||
|
if(chat.type == 'ai'):
|
||||||
|
chatHistory.append(AIMessage(content=chat.content))
|
||||||
|
|
||||||
|
if(chat.type == 'human'):
|
||||||
|
chatHistory.append(HumanMessage(content=chat.content))
|
||||||
|
|
||||||
|
chatHistory.append(("human", "{input}"));
|
||||||
|
|
||||||
|
|
||||||
|
qa_prompt = ChatPromptTemplate.from_messages(chatHistory)
|
||||||
|
|
||||||
|
descriptionchain = qa_prompt | llm
|
||||||
|
|
||||||
|
response = descriptionchain.invoke({"input": data.query})
|
||||||
|
|
||||||
|
return DescriptionResponse(response=response.content)
|
||||||
|
|
||||||
|
|
||||||
# DOC DESCRIPTION
|
# DOC DESCRIPTION
|
||||||
@app.post("/kb/doc")
|
@app.post("/kb/doc")
|
||||||
def get_doc_description(data: UserQuery, response_model=DescriptionResponse):
|
def get_doc_description(data: UserQuery, response_model=DescriptionResponse):
|
||||||
|
|
Loading…
Add table
Reference in a new issue