Better parsing of pasted html as markdown so agents understand (#9190)

Signed-off-by: Douwe Osinga <douwe@squareup.com>
Co-authored-by: Douwe Osinga <douwe@squareup.com>
This commit is contained in:
Douwe Osinga 2026-05-14 12:57:18 -04:00 committed by GitHub
parent 4671053985
commit ffd2349b0e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 88 additions and 25 deletions

View file

@ -96,6 +96,7 @@
"swr": "^2.4.0",
"tailwind-merge": "^3.5.0",
"tailwindcss-animate": "^1.0.7",
"turndown": "^7.2.4",
"tw-animate-css": "^1.4.0",
"unist-util-visit": "^5.1.0",
"uuid": "^13.0.0",
@ -132,6 +133,7 @@
"@types/react-dom": "^19.2.3",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/shell-quote": "^1.7.5",
"@types/turndown": "^5.0.6",
"@types/yauzl": "^2.10.3",
"@typescript-eslint/eslint-plugin": "^8.56.1",
"@typescript-eslint/parser": "^8.56.1",

View file

@ -44,6 +44,29 @@ import { UserInput, ImageData } from '../types/message';
import { compressImageDataUrl } from '../utils/conversionUtils';
import { fetchCanonicalModelInfo } from '../utils/canonical';
import { defineMessages, useIntl } from '../i18n';
import TurndownService from 'turndown';
const turndown = new TurndownService({
headingStyle: 'atx',
bulletListMarker: '-',
codeBlockStyle: 'fenced',
});
turndown.addRule('complexLinks', {
filter: (node) => {
return (
node.nodeName === 'A' &&
!!node.getAttribute('href') &&
/\n/.test(node.textContent || '')
);
},
replacement: (content, node) => {
const el = node as HTMLElement;
const href = el.getAttribute('href')!;
const label = content.replace(/\n+/g, ' ').trim();
return `[${label}](${href})`;
},
});
interface PastedImage {
id: string;
@ -805,10 +828,40 @@ export default function ChatInput({
}, [droppedFiles.length, localDroppedFiles.length, onFilesProcessed, setLocalDroppedFiles]);
const handlePaste = async (evt: React.ClipboardEvent<HTMLTextAreaElement>) => {
if (isRecording) return;
const files = Array.from(evt.clipboardData.files || []);
const imageFiles = files.filter((file) => file.type.startsWith('image/'));
if (imageFiles.length === 0) return;
if (imageFiles.length === 0) {
const html = evt.clipboardData.getData('text/html');
if (html) {
const doc = new DOMParser().parseFromString(html, 'text/html');
const hasLinks = doc.querySelectorAll('a[href]').length > 0;
if (hasLinks) {
const markdown = turndown.turndown(doc.body).trim();
if (markdown) {
evt.preventDefault();
const textarea = textAreaRef.current;
if (textarea) {
const start = textarea.selectionStart;
const end = textarea.selectionEnd;
const newValue =
displayValue.substring(0, start) + markdown + displayValue.substring(end);
const cursorPos = start + markdown.length;
setDisplayValue(newValue);
updateValue(newValue);
setHasUserTyped(true);
checkForMentionOrSlash(newValue, cursorPos, textarea);
requestAnimationFrame(() => {
textarea.selectionStart = textarea.selectionEnd = cursorPos;
});
}
}
}
}
return;
}
// Check if adding these images would exceed the limit
if (pastedImages.length + imageFiles.length > MAX_IMAGES_PER_MESSAGE) {

56
ui/pnpm-lock.yaml generated
View file

@ -171,6 +171,9 @@ importers:
tailwindcss-animate:
specifier: ^1.0.7
version: 1.0.7(tailwindcss@4.2.2)
turndown:
specifier: ^7.2.4
version: 7.2.4
tw-animate-css:
specifier: ^1.4.0
version: 1.4.0
@ -274,6 +277,9 @@ importers:
'@types/shell-quote':
specifier: ^1.7.5
version: 1.7.5
'@types/turndown':
specifier: ^5.0.6
version: 5.0.6
'@types/yauzl':
specifier: ^2.10.3
version: 2.10.3
@ -868,6 +874,10 @@ packages:
peerDependencies:
'@babel/core': ^7.0.0-0
'@babel/runtime@7.28.6':
resolution: {integrity: sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==}
engines: {node: '>=6.9.0'}
'@babel/runtime@7.29.2':
resolution: {integrity: sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==}
engines: {node: '>=6.9.0'}
@ -1927,6 +1937,9 @@ packages:
'@mermaid-js/parser@1.1.0':
resolution: {integrity: sha512-gxK9ZX2+Fex5zu8LhRQoMeMPEHbc73UKZ0FQ54YrQtUxE1VVhMwzeNtKRPAu5aXks4FasbMe4xB4bWrmq6Jlxw==}
'@mixmark-io/domino@2.2.0':
resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==}
'@modelcontextprotocol/ext-apps@0.3.1':
resolution: {integrity: sha512-Iivz2KwWK8xlRbiWwFB/C4NXqE8VJBoRCbBkJCN98ST2UbQvA6kfyebcLsypiqylJS467XOOaBcI9DeQ3t+zqA==}
peerDependencies:
@ -3832,6 +3845,9 @@ packages:
'@types/trusted-types@2.0.7':
resolution: {integrity: sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==}
'@types/turndown@5.0.6':
resolution: {integrity: sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==}
'@types/unist@2.0.11':
resolution: {integrity: sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==}
@ -8247,6 +8263,10 @@ packages:
engines: {node: '>=18.0.0'}
hasBin: true
turndown@7.2.4:
resolution: {integrity: sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ==}
engines: {node: '>=18', npm: '>=9'}
tw-animate-css@1.4.0:
resolution: {integrity: sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==}
@ -8978,6 +8998,8 @@ snapshots:
'@babel/core': 7.29.0
'@babel/helper-plugin-utils': 7.28.6
'@babel/runtime@7.28.6': {}
'@babel/runtime@7.29.2': {}
'@babel/template@7.28.6':
@ -10306,7 +10328,7 @@ snapshots:
'@mcp-ui/client@7.1.0(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
dependencies:
'@modelcontextprotocol/ext-apps': 1.2.2(@modelcontextprotocol/sdk@1.27.1(zod@3.25.76))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(zod@3.25.76)
'@modelcontextprotocol/sdk': 1.27.1(zod@4.3.6)
'@modelcontextprotocol/sdk': 1.27.1(zod@3.25.76)
react: 19.2.4
react-dom: 19.2.4(react@19.2.4)
zod: 3.25.76
@ -10318,6 +10340,8 @@ snapshots:
dependencies:
langium: 4.2.2
'@mixmark-io/domino@2.2.0': {}
'@modelcontextprotocol/ext-apps@0.3.1(@modelcontextprotocol/sdk@1.27.1(zod@3.25.76))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(zod@3.25.76)':
dependencies:
'@modelcontextprotocol/sdk': 1.27.1(zod@3.25.76)
@ -10373,28 +10397,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@modelcontextprotocol/sdk@1.27.1(zod@4.3.6)':
dependencies:
'@hono/node-server': 1.19.11(hono@4.12.8)
ajv: 8.18.0
ajv-formats: 3.0.1(ajv@8.18.0)
content-type: 1.0.5
cors: 2.8.6
cross-spawn: 7.0.6
eventsource: 3.0.7
eventsource-parser: 3.0.6
express: 5.2.1
express-rate-limit: 8.3.1(express@5.2.1)
hono: 4.12.8
jose: 6.2.2
json-schema-typed: 8.0.2
pkce-challenge: 5.0.1
raw-body: 3.0.2
zod: 4.3.6
zod-to-json-schema: 3.25.1(zod@3.25.76)
transitivePeerDependencies:
- supports-color
'@napi-rs/wasm-runtime@1.1.1':
dependencies:
'@emnapi/core': 1.9.1
@ -11806,7 +11808,7 @@ snapshots:
'@testing-library/dom@10.4.1':
dependencies:
'@babel/code-frame': 7.29.0
'@babel/runtime': 7.29.2
'@babel/runtime': 7.28.6
'@types/aria-query': 5.0.4
aria-query: 5.3.0
dom-accessibility-api: 0.5.16
@ -12194,6 +12196,8 @@ snapshots:
'@types/trusted-types@2.0.7':
optional: true
'@types/turndown@5.0.6': {}
'@types/unist@2.0.11': {}
'@types/unist@3.0.3': {}
@ -17531,6 +17535,10 @@ snapshots:
optionalDependencies:
fsevents: 2.3.3
turndown@7.2.4:
dependencies:
'@mixmark-io/domino': 2.2.0
tw-animate-css@1.4.0: {}
type-check@0.4.0: