feat(browser-extension): webpages capture with markdown conversion (#548)

Improved browser extension memory saving with better content handling and added markdown conversion.

### What changed?

- Enhanced memory content handling in the background script to prioritize different content types (explicit content, highlighted text, markdown, HTML, or URL)
- Added HTML to markdown conversion using TurndownService when saving entire pages
- Improved HTML handling by removing script tags before processing
- Updated the web app to display the saved URL from metadata when available
- Added turndown library and its type definitions as dependencies
This commit is contained in:
MaheshtheDev 2025-11-03 00:41:58 +00:00
parent a2071d0deb
commit 8d8d6d91aa
8 changed files with 116 additions and 17 deletions

View file

@ -117,12 +117,36 @@ export default defineBackground(() => {
console.warn("Failed to get default project, using fallback:", error)
}
let content: string
if (data.content) {
content = data.content
} else if (data.highlightedText) {
content = `${data.highlightedText}\n\n${data?.url || ""}`
} else if (data.markdown) {
content = `${data.markdown}\n\n${data?.url || ""}`
} else if (data.html) {
content = `${data.html}\n\n${data?.url || ""}`
} else {
content = data?.url || ""
}
const metadata: MemoryPayload["metadata"] = {
sm_source: "consumer",
website_url: data.url,
}
if (data.ogImage) {
metadata.website_og_image = data.ogImage
}
if (data.title) {
metadata.website_title = data.title
}
const payload: MemoryPayload = {
containerTags: [containerTag],
content:
data.content ||
`${data.highlightedText}\n\n${data.html}\n\n${data?.url}`,
metadata: { sm_source: "consumer" },
content,
metadata,
}
const responseData = await saveMemory(payload)

View file

@ -1,5 +1,6 @@
import { MESSAGE_TYPES, STORAGE_KEYS } from "../../utils/constants"
import { DOMUtils } from "../../utils/ui-components"
import { default as TurndownService } from "turndown"
export async function saveMemory() {
try {
@ -7,15 +8,64 @@ export async function saveMemory() {
const highlightedText = window.getSelection()?.toString() || ""
const url = window.location.href
const html = document.documentElement.outerHTML
const ogImage =
document
.querySelector('meta[property="og:image"]')
?.getAttribute("content") ||
document
.querySelector('meta[name="og:image"]')
?.getAttribute("content") ||
undefined
const title =
document
.querySelector('meta[property="og:title"]')
?.getAttribute("content") ||
document
.querySelector('meta[name="og:title"]')
?.getAttribute("content") ||
document.title ||
undefined
const data: {
html?: string
markdown?: string
highlightedText?: string
url: string
ogImage?: string
title?: string
} = {
url,
}
if (ogImage) {
data.ogImage = ogImage
}
if (title) {
data.title = title
}
if (highlightedText) {
data.highlightedText = highlightedText
} else {
const bodyClone = document.body.cloneNode(true) as HTMLElement
const scripts = bodyClone.querySelectorAll("script")
for (const script of scripts) {
script.remove()
}
const html = bodyClone.innerHTML
// Convert HTML to markdown
const turndownService = new TurndownService()
const markdown = turndownService.turndown(html)
data.markdown = markdown
}
const response = await browser.runtime.sendMessage({
action: MESSAGE_TYPES.SAVE_MEMORY,
data: {
html,
highlightedText,
url,
},
data,
actionSource: "context_menu",
})
@ -74,4 +124,4 @@ export function setupStorageListener() {
)
}
})
}
}

View file

@ -20,12 +20,14 @@
"posthog-js": "^1.261.7",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"tailwindcss": "^4.1.12"
"tailwindcss": "^4.1.12",
"turndown": "^7.1.3"
},
"devDependencies": {
"@types/chrome": "^0.1.4",
"@types/react": "^19.1.2",
"@types/react-dom": "^19.1.3",
"@types/turndown": "^5.0.5",
"@wxt-dev/module-react": "^1.1.3",
"typescript": "^5.8.3",
"wxt": "^0.20.6"

View file

@ -32,9 +32,12 @@ export interface ExtensionMessage {
*/
export interface MemoryData {
html?: string
markdown?: string
content?: string
highlightedText?: string
url?: string
ogImage?: string
title?: string
}
/**

View file

@ -83,12 +83,25 @@ const DocumentCard = memo(
)
}
if (document.url?.includes("https://")) {
// Check if this is a website document saved from the Chrome extension
const websiteUrl =
(document.metadata?.website_url as string | undefined) ||
(document.url?.includes("https://") ? document.url : undefined)
if (websiteUrl) {
return (
<WebsiteCard
url={document.url}
title={document.title || "Untitled Document"}
image={document.ogImage}
url={websiteUrl}
title={
(document.metadata?.website_title as string | undefined) ||
document.title ||
"Untitled Document"
}
image={
(document.metadata?.website_og_image as string | undefined) ||
document.ogImage
}
description={document.content && typeof document.content === "string" ? document.content : undefined}
onOpenDetails={() => onOpenDetails(document)}
onDelete={() => onDelete(document)}
/>

View file

@ -48,6 +48,9 @@ export const getSourceUrl = (document: DocumentWithMemories) => {
if (document.type === "google_slide" && document.customId) {
return `https://docs.google.com/presentation/d/${document.customId}`
}
if(document.metadata?.website_url) {
return document.metadata?.website_url as string
}
// Fallback to existing URL for all other document types
return document.url
}

View file

@ -161,7 +161,7 @@ export const MemoryDetail = memo(
<span>{formatDate(document.createdAt)}</span>
</div>
</div>
{document.url && (
{(document.url || document.metadata?.website_url) && (
<div className="flex items-end">
<Button
onClick={() => {

View file

@ -57,11 +57,13 @@
"react": "^19.1.0",
"react-dom": "^19.1.0",
"tailwindcss": "^4.1.12",
"turndown": "^7.1.3",
},
"devDependencies": {
"@types/chrome": "^0.1.4",
"@types/react": "^19.1.2",
"@types/react-dom": "^19.1.3",
"@types/turndown": "^5.0.5",
"@wxt-dev/module-react": "^1.1.3",
"typescript": "^5.8.3",
"wxt": "^0.20.6",
@ -1792,6 +1794,8 @@
"@types/trusted-types": ["@types/trusted-types@2.0.7", "", {}, "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw=="],
"@types/turndown": ["@types/turndown@5.0.6", "", {}, "sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg=="],
"@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="],
"@types/urijs": ["@types/urijs@1.19.25", "", {}, "sha512-XOfUup9r3Y06nFAZh3WvO0rBU4OtlfPB/vgxpjg+NRdGU6CN6djdc6OEiH+PcqHCY6eFLo9Ista73uarf4gnBg=="],