mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-01 18:19:08 +00:00
142 lines
4.3 KiB
TypeScript
142 lines
4.3 KiB
TypeScript
import type { PlasmoMessaging } from "@plasmohq/messaging";
|
|
|
|
import { Storage } from "@plasmohq/storage";
|
|
import { convertHtmlToMarkdown } from "dom-to-semantic-markdown";
|
|
import { DOMParser } from "linkedom";
|
|
import { getRenderedHtml, webhistoryToLangChainDocument } from "~utils/commons";
|
|
import type { WebHistory } from "~utils/interfaces";
|
|
|
|
// @ts-ignore
|
|
global.Node = {
|
|
ELEMENT_NODE: 1,
|
|
ATTRIBUTE_NODE: 2,
|
|
TEXT_NODE: 3,
|
|
CDATA_SECTION_NODE: 4,
|
|
PROCESSING_INSTRUCTION_NODE: 7,
|
|
COMMENT_NODE: 8,
|
|
DOCUMENT_NODE: 9,
|
|
DOCUMENT_TYPE_NODE: 10,
|
|
DOCUMENT_FRAGMENT_NODE: 11,
|
|
};
|
|
|
|
const handler: PlasmoMessaging.MessageHandler = async (req, res) => {
|
|
try {
|
|
chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
|
|
const storage = new Storage({ area: "local" });
|
|
const tab = tabs[0];
|
|
if (tab.id) {
|
|
const tabId: number = tab.id;
|
|
console.log("tabs", tabs);
|
|
const result = await chrome.scripting.executeScript({
|
|
// @ts-ignore
|
|
target: { tabId: tab.id },
|
|
// @ts-ignore
|
|
func: getRenderedHtml,
|
|
// world: "MAIN"
|
|
});
|
|
|
|
console.log("SnapRes", result);
|
|
|
|
const toPushInTabHistory: any = result[0].result; // const { renderedHtml, title, url, entryTime } = result[0].result;
|
|
|
|
toPushInTabHistory.pageContentMarkdown = convertHtmlToMarkdown(
|
|
toPushInTabHistory.renderedHtml,
|
|
{
|
|
extractMainContent: true,
|
|
enableTableColumnTracking: true,
|
|
includeMetaData: false,
|
|
overrideDOMParser: new DOMParser(),
|
|
}
|
|
);
|
|
|
|
delete toPushInTabHistory.renderedHtml;
|
|
|
|
console.log("toPushInTabHistory", toPushInTabHistory);
|
|
|
|
const urlQueueListObj: any = await storage.get("urlQueueList");
|
|
const timeQueueListObj: any = await storage.get("timeQueueList");
|
|
|
|
const isUrlQueueThere = urlQueueListObj.urlQueueList.find(
|
|
(data: WebHistory) => data.tabsessionId === tabId
|
|
);
|
|
const isTimeQueueThere = timeQueueListObj.timeQueueList.find(
|
|
(data: WebHistory) => data.tabsessionId === tabId
|
|
);
|
|
|
|
toPushInTabHistory.duration =
|
|
toPushInTabHistory.entryTime -
|
|
isTimeQueueThere.timeQueue[isTimeQueueThere.timeQueue.length - 1];
|
|
if (isUrlQueueThere.urlQueue.length === 1) {
|
|
toPushInTabHistory.reffererUrl = "START";
|
|
}
|
|
if (isUrlQueueThere.urlQueue.length > 1) {
|
|
toPushInTabHistory.reffererUrl =
|
|
isUrlQueueThere.urlQueue[isUrlQueueThere.urlQueue.length - 2];
|
|
}
|
|
|
|
const toSaveFinally: any[] = [];
|
|
|
|
const markdownFormat = webhistoryToLangChainDocument(tab.id, [toPushInTabHistory]);
|
|
toSaveFinally.push(...markdownFormat);
|
|
|
|
console.log("toSaveFinally", toSaveFinally);
|
|
|
|
// Log first item to debug metadata structure
|
|
if (toSaveFinally.length > 0) {
|
|
console.log("First item metadata:", toSaveFinally[0].metadata);
|
|
}
|
|
|
|
// Create content array for documents in the format expected by the new API
|
|
// The metadata is already in the correct format in toSaveFinally
|
|
const content = toSaveFinally.map((item) => ({
|
|
metadata: {
|
|
BrowsingSessionId: String(item.metadata.BrowsingSessionId || ""),
|
|
VisitedWebPageURL: String(item.metadata.VisitedWebPageURL || ""),
|
|
VisitedWebPageTitle: String(item.metadata.VisitedWebPageTitle || "No Title"),
|
|
VisitedWebPageDateWithTimeInISOString: String(
|
|
item.metadata.VisitedWebPageDateWithTimeInISOString || ""
|
|
),
|
|
VisitedWebPageReffererURL: String(item.metadata.VisitedWebPageReffererURL || ""),
|
|
VisitedWebPageVisitDurationInMilliseconds: String(
|
|
item.metadata.VisitedWebPageVisitDurationInMilliseconds || "0"
|
|
),
|
|
},
|
|
pageContent: String(item.pageContent || ""),
|
|
}));
|
|
|
|
const token = await storage.get("token");
|
|
const search_space_id = parseInt(await storage.get("search_space_id"), 10);
|
|
|
|
const toSend = {
|
|
document_type: "EXTENSION",
|
|
content: content,
|
|
search_space_id: search_space_id,
|
|
};
|
|
|
|
const requestOptions = {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Authorization: `Bearer ${token}`,
|
|
},
|
|
body: JSON.stringify(toSend),
|
|
};
|
|
|
|
const response = await fetch(
|
|
`${process.env.PLASMO_PUBLIC_BACKEND_URL}/api/v1/documents/`,
|
|
requestOptions
|
|
);
|
|
const resp = await response.json();
|
|
if (resp) {
|
|
res.send({
|
|
message: "Snapshot Saved Successfully",
|
|
});
|
|
}
|
|
}
|
|
});
|
|
} catch (error) {
|
|
console.log(error);
|
|
}
|
|
};
|
|
|
|
export default handler;
|