From 7ff228f576ed8f5ce9773cfa6077e08ca27d6e80 Mon Sep 17 00:00:00 2001 From: Dhravya Date: Sat, 13 Apr 2024 20:33:53 -0700 Subject: [PATCH] attempt to fix browser rendering --- apps/cf-ai-backend/src/routes/getPageContent.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/cf-ai-backend/src/routes/getPageContent.ts b/apps/cf-ai-backend/src/routes/getPageContent.ts index d380657e..4c465514 100644 --- a/apps/cf-ai-backend/src/routes/getPageContent.ts +++ b/apps/cf-ai-backend/src/routes/getPageContent.ts @@ -4,6 +4,7 @@ import { CloudflareVectorizeStore } from '@langchain/cloudflare'; import { Request } from '@cloudflare/workers-types'; import puppeteer from '@cloudflare/puppeteer'; +// TODO: THIS DOESN'T WORK PROPERLY. FOR EG, FOR THIS URL https://dev.to/challenges/cloudflare, IT DOESN'T RETURN FULL CONTENT export async function GET(request: Request, _: CloudflareVectorizeStore, embeddings: OpenAIEmbeddings, model: GenerativeModel, env?: Env) { const { searchParams } = new URL(request.url); let url = searchParams.get('url'); @@ -14,7 +15,8 @@ export async function GET(request: Request, _: CloudflareVectorizeStore, embeddi const page = await browser.newPage(); await page.goto(url); - // Innertext of content + await page.waitForSelector('body'); + const contentElement = await page.$('body'); const content = await page.evaluate((element) => element.innerText, contentElement);