feat: Added Precision Search + Multiple Webpage Sessions Chat

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2024-08-16 20:35:50 -07:00
parent 24a8ebbb34
commit dfeeb58ca2
3 changed files with 163 additions and 19 deletions

View file

@ -85,7 +85,7 @@ CYPHER_GENERATION_PROMPT = PromptTemplate(
)
DOC_DESCRIPTION_TEMPLATE = """Task:Give Detailed Description of the page content of the given document.
DOC_DESCRIPTION_TEMPLATE = DATE_TODAY + """Task:Give Detailed Description of the page content of the given document.
Instructions:
Provide as much details about metadata & page content as if you need to give human readable report of this Browsing session event.
@ -104,4 +104,24 @@ DOCUMENT_METADATA_EXTRACTION_PROMT = ChatPromptTemplate.from_messages([("system"
VECTOR_QUERY_GENERATION_TEMPLATE = DATE_TODAY + """You are a helpful assistant. You are given a user query and the examples of document on which user is asking query about.
Give instruction to machine how to search for the data based on user query.
Document Examples:
{examples}
Note: Only return the Query and nothing else. No explanation.
User Query: {query}
Helpful Answer:"""
VECTOR_QUERY_GENERATION_PROMT = PromptTemplate(
input_variables=["examples", "query"], template=VECTOR_QUERY_GENERATION_TEMPLATE
)

View file

@ -1,18 +1,6 @@
from pydantic import BaseModel, Field
from typing import List, Optional
class UserQuery(BaseModel):
query: str
neourl: str
neouser: str
neopass: str
openaikey: str
apisecretkey: str
class DescriptionResponse(BaseModel):
response: str
class DocMeta(BaseModel):
BrowsingSessionId: Optional[str] = Field(default=None, description="BrowsingSessionId of Document")
VisitedWebPageURL: Optional[str] = Field(default=None, description="VisitedWebPageURL of Document")
@ -22,6 +10,48 @@ class DocMeta(BaseModel):
VisitedWebPageVisitDurationInMilliseconds: Optional[int] = Field(default=None, description="VisitedWebPageVisitDurationInMilliseconds of Document"),
VisitedWebPageContent: Optional[str] = Field(default=None, description="Visited WebPage Content in markdown of Document")
class PrecisionQuery(BaseModel):
sessionid: Optional[str] = Field(default=None)
webpageurl: Optional[str] = Field(default=None)
daterange: Optional[List[str]]
timerange: Optional[List[int]]
neourl: str
neouser: str
neopass: str
openaikey: str
apisecretkey: str
class PrecisionResponse(BaseModel):
documents: List[DocMeta]
class UserQuery(BaseModel):
query: str
neourl: str
neouser: str
neopass: str
openaikey: str
apisecretkey: str
class ChatHistory(BaseModel):
type: str
content: str | List[DocMeta]
class UserQueryWithChatHistory(BaseModel):
chat: List[ChatHistory]
query: str
neourl: str
neouser: str
neopass: str
openaikey: str
apisecretkey: str
class DescriptionResponse(BaseModel):
response: str
class RetrivedDocListItem(BaseModel):
metadata: DocMeta
pageContent: str

View file

@ -6,9 +6,11 @@ from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Neo4jVector
from envs import ACCESS_TOKEN_EXPIRE_MINUTES, ALGORITHM, API_SECRET_KEY, SECRET_KEY
from prompts import CYPHER_QA_PROMPT, DOC_DESCRIPTION_PROMPT, GRAPH_QUERY_GEN_PROMPT, SIMILARITY_SEARCH_PROMPT , CYPHER_GENERATION_PROMPT, DOCUMENT_METADATA_EXTRACTION_PROMT
from pydmodels import DescriptionResponse, UserQuery, DocMeta, RetrivedDocList, UserQueryResponse, VectorSearchQuery
from prompts import CYPHER_QA_PROMPT, DATE_TODAY, DOC_DESCRIPTION_PROMPT, GRAPH_QUERY_GEN_PROMPT, SIMILARITY_SEARCH_PROMPT , CYPHER_GENERATION_PROMPT, DOCUMENT_METADATA_EXTRACTION_PROMT
from pydmodels import DescriptionResponse, PrecisionQuery, PrecisionResponse, UserQuery, DocMeta, RetrivedDocList, UserQueryResponse, UserQueryWithChatHistory, VectorSearchQuery
from langchain_experimental.text_splitter import SemanticChunker
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
#Our Imps
from LLMGraphTransformer import LLMGraphTransformer
@ -32,7 +34,7 @@ app = FastAPI()
# GraphCypherQAChain
@app.post("/")
def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
@ -53,9 +55,9 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
# Query Expansion
searchchain = GRAPH_QUERY_GEN_PROMPT | llm
qry = searchchain.invoke({"question": data.query, "context": examples})
# qry = searchchain.invoke({"question": data.query, "context": examples})
query = qry.content
query = data.query #qry.content
embeddings = OpenAIEmbeddings(
model="text-embedding-ada-002",
@ -84,7 +86,7 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
embedding_node_property="embedding",
)
docs = vector_index.similarity_search(query,k=5)
docs = vector_index.similarity_search(data.query,k=5)
docstoreturn = []
@ -144,7 +146,99 @@ def get_user_query_response(data: UserQuery, response_model=UserQueryResponse):
response = searchchain.invoke({"question": data.query, "context": docs})
return UserQueryResponse(relateddocs=docstoreturn,response=response.content)
#RETURN n LIMIT 25;
@app.post("/precision")
def get_precision_search_response(data: PrecisionQuery, response_model=PrecisionResponse):
if(data.apisecretkey != API_SECRET_KEY):
raise HTTPException(status_code=401, detail="Unauthorized")
graph = Neo4jGraph(url=data.neourl, username=data.neouser, password=data.neopass)
GRAPH_QUERY = "MATCH (d:Document) WHERE d.VisitedWebPageDateWithTimeInISOString >= " + "'" + data.daterange[0] + "'" + " AND d.VisitedWebPageDateWithTimeInISOString <= " + "'" + data.daterange[1] + "'"
if(data.timerange[0] >= data.timerange[1]):
GRAPH_QUERY += " AND d.VisitedWebPageVisitDurationInMilliseconds >= 0"
else:
GRAPH_QUERY += " AND d.VisitedWebPageVisitDurationInMilliseconds >= "+ str(data.timerange[0]) + " AND d.VisitedWebPageVisitDurationInMilliseconds <= " + str(data.timerange[1])
if(data.webpageurl):
GRAPH_QUERY += " AND d.VisitedWebPageURL CONTAINS " + "'" + data.webpageurl.lower() + "'"
if(data.sessionid):
GRAPH_QUERY += " AND d.BrowsingSessionId = " + "'" + data.sessionid + "'"
GRAPH_QUERY += " RETURN d;"
graphdocs = graph.query(GRAPH_QUERY)
docsDict = {}
for d in graphdocs:
if d['d']['BrowsingSessionId'] not in docsDict:
docsDict[d['d']['BrowsingSessionId']] = d['d']
else:
docsDict[d['d']['BrowsingSessionId']]['text'] += d['d']['text']
docs = []
for x in docsDict.values():
docs.append(DocMeta(
BrowsingSessionId=x['BrowsingSessionId'],
VisitedWebPageURL=x['VisitedWebPageURL'],
VisitedWebPageVisitDurationInMilliseconds=x['VisitedWebPageVisitDurationInMilliseconds'],
VisitedWebPageTitle=x['VisitedWebPageTitle'],
VisitedWebPageReffererURL=x['VisitedWebPageReffererURL'],
VisitedWebPageDateWithTimeInISOString=x['VisitedWebPageDateWithTimeInISOString'],
VisitedWebPageContent=x['text']
))
return PrecisionResponse(documents=docs)
# Multi DOC Chat
@app.post("/chat/docs")
def doc_chat_with_history(data: UserQueryWithChatHistory, response_model=DescriptionResponse):
if(data.apisecretkey != API_SECRET_KEY):
raise HTTPException(status_code=401, detail="Unauthorized")
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
max_tokens=None,
timeout=None,
api_key=data.openaikey
)
chatHistory = []
for chat in data.chat:
if(chat.type == 'system'):
chatHistory.append(SystemMessage(content=DATE_TODAY + """You are an helpful assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Context:""" + str(chat.content)))
if(chat.type == 'ai'):
chatHistory.append(AIMessage(content=chat.content))
if(chat.type == 'human'):
chatHistory.append(HumanMessage(content=chat.content))
chatHistory.append(("human", "{input}"));
qa_prompt = ChatPromptTemplate.from_messages(chatHistory)
descriptionchain = qa_prompt | llm
response = descriptionchain.invoke({"input": data.query})
return DescriptionResponse(response=response.content)
# DOC DESCRIPTION
@app.post("/kb/doc")
def get_doc_description(data: UserQuery, response_model=DescriptionResponse):